LOADING REQUIREMENTS

PACKAGES

ENSEMBL

# ENSEMBL DATABASE
# v102 is for the latest mm10 version
# ensembl.v102 <- useMart(host = "https://nov2020.archive.ensembl.org",
#                        biomart = "ENSEMBL_MART_ENSEMBL",
#                        dataset = "mmusculus_gene_ensembl")

COLOR PALETTE

palette_1 = list(red = "#E9002D",
                 amber = "#FFAA00",
                 green = "#00B000")
palette_2 = list(red = "#FF1F5B",
                 green = "#00CD6C",
                 blue = "#009ADE",
                 purple = "#AF58BA",
                 yellow = "#FFC61E",
                 orange = "#F28522",
                 grey = "#A0B1BA",
                 brown = "#A6761D")
palette_3 = list(grey1 = "#a0b1ba",
                 grey2 = "#c5d0d5",
                 grey3 = "#eceff1")

colorListLoop <-  c(palette_3[["grey3"]], palette_3[["grey2"]], palette_3[["grey1"]],
                     "#C5E1EF", "#6CB0D6", "#226E9C",
                     "#06592A", 
                     "#FED976", "#FD8D3C", "#E31A1C")
colorListPromoter <- c(palette_3[["grey3"]],"#E88587", "#E31A1C")
colorListEnhancer <- c(palette_3[["grey3"]],"#87AFC7", "#226E9C")
colorListStructure <- c(palette_3[["grey3"]], palette_3[["grey2"]], palette_3[["grey1"]])

DIR LIST

# 
# loopDir <- here("../..", "result", "loop", "chromo_detect")
# scoreDir <- here("../..", "result", "loop", "chromo_quantify")
# consensusDir <- here("../..", "result", "loop", "consensus")
# figDir <- here("../..", "figure", "loop")
# refDir <- here("../..", "reference")

FIGURE PARAMETERS

library(colorspace)

fontType <- "Helvetica"

fontSizeL <- 10 # pt
fontSizeM <- 8
fontSizeS <- 6

lineThick <- 0.75 # pt
lineMedium <- 0.5
lineThin <- 0.25

panelUnit <- 30 # mm
panelMargin <- 1.5

mmToInch <- 0.03937007874
mmToLineUnit <- 1/2.13
mmToLinePlotgarden <- 1/0.75
ptToMM <- 1/2.845


strong_red <- "#CB333A"
strong_blue <- "#4851A0"
weak_red <- lighten(strong_red, amount = 0.4)   # FF7D81
weak_blue <- lighten(strong_blue, amount = 0.4) # 8A91DD
no_grey <- "#A8A8A8"

strong_teel <- "#0892A5"
strong_green <- "#23CE6B" # A485
strong_darkgreen <- "#054A29"
strong_yellow <- "#FFBA49"
strong_orange <- "#F18F01" # dTAG
strong_lightpurple <- "#BD93D8"
strong_purple <- "#9E33CB" # Epi

panelSize <- function(num, unit = panelUnit, margin = panelMargin){
  return(num*unit - 2*margin)
}

FUNCTIONS

importBedpe = function(bedpe){
  a1 = makeGRangesFromDataFrame(data.frame(
    chr = bedpe$V1,
    start = bedpe$V2 +1,
    end = bedpe$V3))
  a2 = makeGRangesFromDataFrame(data.frame(
    chr = bedpe$V4,
    start = bedpe$V5 +1,
    end = bedpe$V6))
  GInteractions(a1, a2)
}

get_density <- function(x, y, ...) {
  dens <- MASS::kde2d(x, y, ...)
  ix <- findInterval(x, dens$x)
  iy <- findInterval(y, dens$y)
  ii <- cbind(ix, iy)
  return(dens$z[ii])
}
label_kb_mb <- function(x) {
  ifelse(x >= 1000000, paste0(x / 1000000, "Mb"), paste0(x / 1000, "kb"))
}

importPeak = function(fileName){
  df = fread(fileName)
  gr = makeGRangesFromDataFrame(data.frame(
    chr = df$V1, start = df$V2, end = df$V3
  ))
}

Comparing G1 vs Async for consensus loops

All loops
diffCutoff = 0.2
data <- fread(here(consensusDir, "chromo_cons_score.tsv"))

temp <- fread(here(consensusDir, "chromo_cons_score_async.tsv")) %>%
  dplyr::select(id, UT, AID)

score.tb <- data %>% dplyr::full_join(temp, by = c("id")) %>%
  dplyr::mutate(diff_G1 = dTAG - DMSO,
                  diff_async = AID - UT)


### P1. UT vs AID
score.tb$density <- get_density(score.tb$UT, score.tb$AID, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$UT, score.tb$AID)
p1 <- ggplot(score.tb, aes(x = UT, y = AID, color = density)) +
  geom_point() + 
  scale_color_viridis() +
  xlim(-0.5, 1) + ylim(-0.5, 1) +
  coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
  geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
  theme_classic() + ggtitle(paste0("Consensus loop score")) +
  annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")

### P1. DMSO vs dTAG
score.tb$density <- get_density(score.tb$DMSO, score.tb$dTAG, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$DMSO, score.tb$dTAG)
p2 <- ggplot(score.tb, aes(x = DMSO, y = dTAG, color = density)) +
  geom_point() + 
  scale_color_viridis() +
  xlim(-0.5, 1) + ylim(-0.5, 1) +
  coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
  geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
  theme_classic() + ggtitle(paste0("Consensus loop score")) +
  annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")


### P3. DMSO vs A485
score.tb$density <- get_density(score.tb$DMSO, score.tb$A485, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$DMSO, score.tb$A485)
p3 <- ggplot(score.tb, aes(x = DMSO, y = A485, color = density)) +
  geom_point() + 
  scale_color_viridis() +
  xlim(-0.5, 1) + ylim(-0.5, 1) +
  coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
  geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
  theme_classic() + ggtitle(paste0("Consensus loop score")) +
  annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")

### P3. DMSO vs UT
score.tb$density <- get_density(score.tb$DMSO, score.tb$UT, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$DMSO, score.tb$UT)
p4 <- ggplot(score.tb, aes(x = DMSO, y = UT, color = density)) +
  geom_point() + 
  scale_color_viridis() +
  xlim(-0.5, 1) + ylim(-0.5, 1) +
  coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
  geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
  theme_classic() + ggtitle(paste0("Consensus loop score")) +
  annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")


### P3. dTAG vs AID
score.tb$density <- get_density(score.tb$dTAG, score.tb$AID, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$dTAG, score.tb$AID)
p5 <- ggplot(score.tb, aes(x = dTAG, y = AID, color = density)) +
  geom_point() + 
  scale_color_viridis() +
  xlim(-0.5, 1) + ylim(-0.5, 1) +
  coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
  geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
  theme_classic() + ggtitle(paste0("Consensus loop score")) +
  annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")

### P3. diff
score.tb$density <- get_density(score.tb$diff_G1, score.tb$diff_async, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$diff_G1, score.tb$diff_async)
p6 <- ggplot(score.tb, aes(x = diff_G1, y = diff_async, color = density)) +
  geom_point() + 
  scale_color_viridis() +
  xlim(-1, 0.5) + ylim(-1, 0.5) +
  coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_hline(yintercept = diffCutoff, alpha = 0.5, color = "grey") +
  geom_hline(yintercept = -diffCutoff, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = diffCutoff, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = -diffCutoff, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  theme_classic() + ggtitle(paste0("Consensus loop score")) +
  annotate("text", x = -1, y = 0.5, label = paste("r =", round(correlation, 2)), size = 5, color = "black")


png(here(figDir, paste0("consensus_score_scatterplot_pu100pz100_G1vsAsync.png")), res = 600, units = "in", width = 5*2.5, height = 2.5*2.5)
print(cowplot::plot_grid(p1, p2, p3, 
                         p4, p5, p6, align = "h", ncol = 3))
dev.off()
svglite(here(figDir, paste0("consensus_score_scatterplot_pu100pz100_G1vsAsync.svg")), width = 5*2.5, height = 2.5*2.5)
print(cowplot::plot_grid(p1, p2, p3, 
                         p4, p5, p6, align = "h", ncol = 3))
dev.off()
reg loops
temp <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe")) %>%
  dplyr::mutate(res = V3 - V2,
                id = paste(V1, res, V2, V5, sep = "_"))
regID <- temp$id

diffCutoff = 0.2
data <- fread(here(consensusDir, "chromo_cons_score.tsv"))

temp <- fread(here(consensusDir, "chromo_cons_score_async.tsv")) %>%
  dplyr::select(id, UT, AID)

score.tb <- data %>% dplyr::full_join(temp, by = c("id")) %>%
  dplyr::mutate(diff_G1 = dTAG - DMSO,
                  diff_async = AID - UT) %>%
  dplyr::filter(id %in% regID)


### P1. UT vs AID
score.tb$density <- get_density(score.tb$UT, score.tb$AID, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$UT, score.tb$AID)
p1 <- ggplot(score.tb, aes(x = UT, y = AID, color = density)) +
  geom_point() + 
  scale_color_viridis() +
  xlim(-0.5, 1) + ylim(-0.5, 1) +
  coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
  geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
  theme_classic() + ggtitle(paste0("Consensus loop score")) +
  annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")

### P1. DMSO vs dTAG
score.tb$density <- get_density(score.tb$DMSO, score.tb$dTAG, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$DMSO, score.tb$dTAG)
p2 <- ggplot(score.tb, aes(x = DMSO, y = dTAG, color = density)) +
  geom_point() + 
  scale_color_viridis() +
  xlim(-0.5, 1) + ylim(-0.5, 1) +
  coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
  geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
  theme_classic() + ggtitle(paste0("Consensus loop score")) +
  annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")


### P3. DMSO vs A485
score.tb$density <- get_density(score.tb$DMSO, score.tb$A485, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$DMSO, score.tb$A485)
p3 <- ggplot(score.tb, aes(x = DMSO, y = A485, color = density)) +
  geom_point() + 
  scale_color_viridis() +
  xlim(-0.5, 1) + ylim(-0.5, 1) +
  coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
  geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
  theme_classic() + ggtitle(paste0("Consensus loop score")) +
  annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")

### P3. DMSO vs UT
score.tb$density <- get_density(score.tb$DMSO, score.tb$UT, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$DMSO, score.tb$UT)
p4 <- ggplot(score.tb, aes(x = DMSO, y = UT, color = density)) +
  geom_point() + 
  scale_color_viridis() +
  xlim(-0.5, 1) + ylim(-0.5, 1) +
  coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
  geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
  theme_classic() + ggtitle(paste0("Consensus loop score")) +
  annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")


### P3. dTAG vs AID
score.tb$density <- get_density(score.tb$dTAG, score.tb$AID, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$dTAG, score.tb$AID)
p5 <- ggplot(score.tb, aes(x = dTAG, y = AID, color = density)) +
  geom_point() + 
  scale_color_viridis() +
  xlim(-0.5, 1) + ylim(-0.5, 1) +
  coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
  geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
  theme_classic() + ggtitle(paste0("Consensus loop score")) +
  annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")

### P3. diff
score.tb$density <- get_density(score.tb$diff_G1, score.tb$diff_async, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$diff_G1, score.tb$diff_async)
p6 <- ggplot(score.tb, aes(x = diff_G1, y = diff_async, color = density)) +
  geom_point() + 
  scale_color_viridis() +
  xlim(-1, 0.5) + ylim(-1, 0.5) +
  coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  geom_hline(yintercept = diffCutoff, alpha = 0.5, color = "grey") +
  geom_hline(yintercept = -diffCutoff, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = diffCutoff, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = -diffCutoff, alpha = 0.5, color = "grey") +
    theme_classic() + ggtitle(paste0("Consensus loop score")) +
  annotate("text", x = -1, y = 0.5, label = paste("r =", round(correlation, 2)), size = 5, color = "black")


png(here(figDir, paste0("consensus_score_scatterplot_pu100pz100_G1vsAsync_reg.png")), res = 600, units = "in", width = 5*2.5, height = 2.5*2.5)
print(cowplot::plot_grid(p1, p2, p3, 
                         p4, p5, p6, align = "h", ncol = 3))
dev.off()
svglite(here(figDir, paste0("consensus_score_scatterplot_pu100pz100_G1vsAsync_reg.svg")), width = 5*2.5, height = 2.5*2.5)
print(cowplot::plot_grid(p1, p2, p3, 
                         p4, p5, p6, align = "h", ncol = 3))
dev.off()


########## Saving Async and G1 specific regulatory loop
diffCutoff <- 0.2
temp <- score.tb %>% dplyr::filter(diff_G1 >= -diffCutoff, diff_async >= -diffCutoff) %>%
  dplyr::select(seq(1, 7))
fwrite(temp, here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_G1vsAsync_bothRetained.bedpe"), sep = "\t", col.names = FALSE)

temp <- score.tb %>% dplyr::filter(diff_G1 >= -diffCutoff, diff_async < -diffCutoff) %>%
  dplyr::select(seq(1, 7))
fwrite(temp, here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_G1vsAsync_AsyncSpecificPert.bedpe"), sep = "\t", col.names = FALSE)

temp <- score.tb %>% dplyr::filter(diff_G1 < -diffCutoff, diff_async >= -diffCutoff) %>%
  dplyr::select(seq(1, 7))
fwrite(temp, here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_G1vsAsync_G1SpecificPert.bedpe"), sep = "\t", col.names = FALSE)

temp <- score.tb %>% dplyr::filter(diff_G1 < -diffCutoff, diff_async < -diffCutoff) %>%
  dplyr::select(seq(1, 7))
fwrite(temp, here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_G1vsAsync_bothPert.bedpe"), sep = "\t", col.names = FALSE)
str loops
temp <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure.bedpe")) %>%
  dplyr::mutate(res = V3 - V2,
                id = paste(V1, res, V2, V5, sep = "_"))
regID <- temp$id

diffCutoff = 0.2
data <- fread(here(consensusDir, "chromo_cons_score.tsv"))

temp <- fread(here(consensusDir, "chromo_cons_score_async.tsv")) %>%
  dplyr::select(id, UT, AID)

score.tb <- data %>% dplyr::full_join(temp, by = c("id")) %>%
  dplyr::mutate(diff_G1 = dTAG - DMSO,
                  diff_async = AID - UT) %>%
  dplyr::filter(id %in% regID)


### P1. UT vs AID
score.tb$density <- get_density(score.tb$UT, score.tb$AID, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$UT, score.tb$AID)
p1 <- ggplot(score.tb, aes(x = UT, y = AID, color = density)) +
  geom_point() + 
  scale_color_viridis() +
  xlim(-0.5, 1) + ylim(-0.5, 1) +
  coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
  geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
  theme_classic() + ggtitle(paste0("Consensus loop score")) +
  annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")

### P1. DMSO vs dTAG
score.tb$density <- get_density(score.tb$DMSO, score.tb$dTAG, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$DMSO, score.tb$dTAG)
p2 <- ggplot(score.tb, aes(x = DMSO, y = dTAG, color = density)) +
  geom_point() + 
  scale_color_viridis() +
  xlim(-0.5, 1) + ylim(-0.5, 1) +
  coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
  geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
  theme_classic() + ggtitle(paste0("Consensus loop score")) +
  annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")


### P3. DMSO vs A485
score.tb$density <- get_density(score.tb$DMSO, score.tb$A485, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$DMSO, score.tb$A485)
p3 <- ggplot(score.tb, aes(x = DMSO, y = A485, color = density)) +
  geom_point() + 
  scale_color_viridis() +
  xlim(-0.5, 1) + ylim(-0.5, 1) +
  coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
  geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
  theme_classic() + ggtitle(paste0("Consensus loop score")) +
  annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")

### P3. DMSO vs UT
score.tb$density <- get_density(score.tb$DMSO, score.tb$UT, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$DMSO, score.tb$UT)
p4 <- ggplot(score.tb, aes(x = DMSO, y = UT, color = density)) +
  geom_point() + 
  scale_color_viridis() +
  xlim(-0.5, 1) + ylim(-0.5, 1) +
  coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
  geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
  theme_classic() + ggtitle(paste0("Consensus loop score")) +
  annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")


### P3. dTAG vs AID
score.tb$density <- get_density(score.tb$dTAG, score.tb$AID, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$dTAG, score.tb$AID)
p5 <- ggplot(score.tb, aes(x = dTAG, y = AID, color = density)) +
  geom_point() + 
  scale_color_viridis() +
  xlim(-0.5, 1) + ylim(-0.5, 1) +
  coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
  geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
  theme_classic() + ggtitle(paste0("Consensus loop score")) +
  annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")

### P3. diff
score.tb$density <- get_density(score.tb$diff_G1, score.tb$diff_async, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$diff_G1, score.tb$diff_async)
p6 <- ggplot(score.tb, aes(x = diff_G1, y = diff_async, color = density)) +
  geom_point() + 
  scale_color_viridis() +
  xlim(-1, 0.5) + ylim(-1, 0.5) +
  coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  geom_hline(yintercept = diffCutoff, alpha = 0.5, color = "grey") +
  geom_hline(yintercept = -diffCutoff, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = diffCutoff, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = -diffCutoff, alpha = 0.5, color = "grey") +
  theme_classic() + ggtitle(paste0("Consensus loop score")) +
  annotate("text", x = -1, y = 0.5, label = paste("r =", round(correlation, 2)), size = 5, color = "black")


png(here(figDir, paste0("consensus_score_scatterplot_pu100pz100_G1vsAsync_str.png")), res = 600, units = "in", width = 5*2.5, height = 2.5*2.5)
print(cowplot::plot_grid(p1, p2, p3, 
                         p4, p5, p6, align = "h", ncol = 3))
dev.off()
svglite(here(figDir, paste0("consensus_score_scatterplot_pu100pz100_G1vsAsync_str.svg")), width = 5*2.5, height = 2.5*2.5)
print(cowplot::plot_grid(p1, p2, p3, 
                         p4, p5, p6, align = "h", ncol = 3))
dev.off()

Comparing loops called from Async

# Importing loops. For make comparison easier, 25 kb
binSize = 25*1000
temp <- fread(here(refDir, "Tjian_chromosight_loop.bedpe")) %>%
  dplyr::mutate(center1 = 0.5*(V2 + V3),
                center2 = 0.5*(V5 + V6),
                start1 = center1 - 0.5*binSize,
                end1 = center1 + 0.5*binSize,
                start2 = center2 - 0.5*binSize,
                end2 = center2 + 0.5*binSize) %>%
  dplyr::select(V1, start1, end1, V4, start2, end2)
colnames(temp) <- c("V1", "V2", "V3", "V4", "V5", "V6")
loop.async <- importBedpe(temp)


binSize = 25*1000
temp <- fread(here(consensusDir, "chromo_cons.bedpe")) %>%
  dplyr::mutate(center1 = 0.5*(V2 + V3),
                center2 = 0.5*(V5 + V6),
                start1 = center1 - 0.5*binSize,
                end1 = center1 + 0.5*binSize,
                start2 = center2 - 0.5*binSize,
                end2 = center2 + 0.5*binSize) %>%
  dplyr::select(V1, start1, end1, V4, start2, end2)
colnames(temp) <- c("V1", "V2", "V3", "V4", "V5", "V6")
loop.G1 <- importBedpe(temp)


# Check overlap
overlap <- findOverlaps(loop.async, loop.G1)


n.async <- nrow(as_tibble(loop.async))
n.G1 <- nrow(as_tibble(loop.G1))
n.async.overlap <- length(unique(queryHits(overlap)))
n.G1.overlap <- length(unique(subjectHits(overlap)))

plot(euler(c("Async" = n.async - n.async.overlap,
             "Async&G1" = n.async.overlap,
             "G1" = 1)), quantities = TRUE)


plot(euler(c("Async" =1,
             "Async&G1" = n.G1.overlap,
             "G1" = n.G1 - n.G1.overlap)), quantities = TRUE)




# Importing loops. For make comparison easier, 25 kb
binSize = 25*1000
temp.pp <- fread(here(refDir, "Tjian_chromosight_loop_P-P.bedpe")) %>%
  dplyr::mutate(center1 = 0.5*(V2 + V3),
                center2 = 0.5*(V5 + V6),
                start1 = center1 - 0.5*binSize,
                end1 = center1 + 0.5*binSize,
                start2 = center2 - 0.5*binSize,
                end2 = center2 + 0.5*binSize) %>%
  dplyr::select(V1, start1, end1, V4, start2, end2)
colnames(temp.pp) <- c("V1", "V2", "V3", "V4", "V5", "V6")
binSize = 25*1000
temp.ep <- fread(here(refDir, "Tjian_chromosight_loop_E-P.bedpe")) %>%
  dplyr::mutate(center1 = 0.5*(V2 + V3),
                center2 = 0.5*(V5 + V6),
                start1 = center1 - 0.5*binSize,
                end1 = center1 + 0.5*binSize,
                start2 = center2 - 0.5*binSize,
                end2 = center2 + 0.5*binSize) %>%
  dplyr::select(V1, start1, end1, V4, start2, end2)
colnames(temp.ep) <- c("V1", "V2", "V3", "V4", "V5", "V6")
temp <- bind_rows(temp.pp, temp.ep)
loop.async <- importBedpe(temp)


binSize = 25*1000
temp <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe")) %>%
  dplyr::mutate(center1 = 0.5*(V2 + V3),
                center2 = 0.5*(V5 + V6),
                start1 = center1 - 0.5*binSize,
                end1 = center1 + 0.5*binSize,
                start2 = center2 - 0.5*binSize,
                end2 = center2 + 0.5*binSize) %>%
  dplyr::select(V1, start1, end1, V4, start2, end2)
colnames(temp) <- c("V1", "V2", "V3", "V4", "V5", "V6")
loop.G1 <- importBedpe(temp)


# Check overlap
overlap <- findOverlaps(loop.async, loop.G1)


n.async <- nrow(as_tibble(loop.async))
n.G1 <- nrow(as_tibble(loop.G1))
n.async.overlap <- length(unique(queryHits(overlap)))
n.G1.overlap <- length(unique(subjectHits(overlap)))

plot(euler(c("Async" = n.async - n.async.overlap,
             "Async&G1" = n.async.overlap,
             "G1" = 1)), quantities = TRUE)


plot(euler(c("Async" =0.1,
             "Async&G1" = n.G1.overlap,
             "G1" = n.G1 - n.G1.overlap)), quantities = TRUE)

[2.5] Consensus loop annotation

#### Importing ChIP-exo peaks
refDir <- here("../..", "reference")
peak.H3K27ac <- importPeak(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.narrowPeak.bed"))
peak.H3K4me3 <- importPeak(here(refDir, "33255_H3K4me3_04-745_Bruce-4_peaks.mergePeak.bed"))
peak.CTCF <- importPeak(here(refDir, "33248_CTCF_07-729_Bruce-4_peaks.mergePeak.bed"))
peak.RAD21 <- importPeak(here(refDir, "33250_RAD21_ab992_Bruce-4_peaks.mergePeak.bed"))
peak.Whyte.SE <- importPeak(here(refDir, "superEnhancer_Whyte_ESC_mm10.bed"))
peak.Dylan.SE <- importPeak(here(refDir, "superEnhancer_Dylan_ESC.bed"))

Functions

createLoopAnnotation <- function(bedpe.loop.anno, name, figDir, outDir, colorList){
  temp = bedpe.loop.anno %>%
    dplyr::mutate(sample = name)
  
  num = nrow(temp)
  p7 = ggplot(temp, aes(x = sample, fill = Anno2)) +
    geom_bar(color = "black") +
    theme_bw() +
    labs(title = paste0(num, " loops"),
         x = "", y = "Counts") +
    scale_y_continuous(labels = comma_format()) +
    theme(plot.title = element_text(hjust = 0.5),
          aspect.ratio = 5,
          legend.position = "right",
          legend.direction  = "vertical") +
    scale_fill_manual(values = colorList)
  
  width = 3
  height = 5
  svglite(here(figDir,
               paste0("loopClassify_", name, ".svg")),
          width = width, height = height)
  plot(p7)
  invisible(dev.off())
  png(here(figDir,
           paste0("loopClassify_", name, ".png")),
      width = width, height = height, res = 600, units = "in")
  plot(p7)
  invisible(dev.off())
}


annotateLoopRelaxedTSS <- function(bedpe.anno){
  temp = bedpe.anno %>% dplyr::rowwise() %>%
    dplyr::mutate(
      A1 = if_else((A1_H3K4me3TSS), "P", 
                   if_else((A1_H3K27ac), "E",
                           if_else((A1_CTCF|A1_RAD21), "S", "X"))),
      A2 = if_else((A2_H3K4me3TSS), "P", 
                   if_else((A2_H3K27ac), "E",
                           if_else((A2_CTCF|A2_RAD21), "S", "X")))
    )
  temp = temp %>% dplyr::rowwise() %>%
    dplyr::mutate(Anno = paste0(A1, "-", A2),
                  Anno2 = if_else(Anno == "E-P", "P-E", 
                                  if_else(Anno == "S-P", "P-S",
                                          if_else(Anno == "X-P", "P-X",
                                                  if_else(Anno == "S-E", "E-S",
                                                          if_else(Anno == "X-E", "E-X",
                                                                  if_else(Anno == "X-S", "S-X",
                                                                          Anno)))))),
    )
  
  temp$Anno2 = factor(temp$Anno2, level = c("X-X",
                                            "S-X", "S-S",
                                            "E-X","E-S","E-E",
                                            "P-E","P-X", "P-S", "P-P"))
  
  # Checking the precense of super enhancer
  temp <- temp %>% dplyr::rowwise() %>%
    dplyr::mutate(AnnoSE = ifelse(A1_Whyte.SE | A2_Whyte.SE, "SE", "NO"))
  temp$AnnoSE <- factor(temp$AnnoSE, level = c("SE", "NO"))
  
  return(temp)
}

annotateLoopPromoterTSS <- function(bedpe.anno){
  temp = bedpe.anno %>% dplyr::rowwise() %>%
    dplyr::mutate(
      A1 = if_else((A1_H3K4me3TSS), "P", "N"),
      A2 = if_else((A2_H3K4me3TSS), "P", "N")
    )
  temp = temp %>% dplyr::rowwise() %>%
    dplyr::mutate(Anno = paste0(A1, "-", A2),
                  Anno2 = if_else(Anno == "N-P", "P-N", Anno))
  
  temp$Anno2 = factor(temp$Anno2, level = c("N-N", "P-N", "P-P"))
  
  return(temp)
}

annotateLoopEnhancer <- function(bedpe.anno){
  temp = bedpe.anno %>% dplyr::rowwise() %>%
    dplyr::mutate(
      A1 = if_else((A1_H3K27ac), "E", "N"),
      A2 = if_else((A2_H3K27ac), "E", "N")
    )
  temp = temp %>% dplyr::rowwise() %>%
    dplyr::mutate(Anno = paste0(A1, "-", A2),
                  Anno2 = if_else(Anno == "N-E", "E-N", Anno))
  temp$Anno2 = factor(temp$Anno2, level = c("N-N", "E-N", "E-E"))
  
  return(temp)
}

annotateLoopStructure <- function(bedpe.anno){
  temp = bedpe.anno %>% dplyr::rowwise() %>%
    dplyr::mutate(
      A1 = if_else((A1_CTCF|A1_RAD21), "S", "N"),
      A2 = if_else((A2_CTCF|A2_RAD21), "S", "N")
    )
  temp = temp %>% dplyr::rowwise() %>%
    dplyr::mutate(Anno = paste0(A1, "-", A2),
                  Anno2 = if_else(Anno == "N-S", "S-N", Anno))
  temp$Anno2 = factor(temp$Anno2, level = c("N-N", "S-N", "S-S"))
  return(temp)
}


annotateAnchorTSS <- function(bedpe){
  tb.loop = 
    setOverlapColumn("Whyte.SE",
                     setOverlapColumn("CTCF",
                                      setOverlapColumn("RAD21",
                                                       setOverlapColumn("H3K27ac",
                                                                        setOverlapColumn("H3K4me3TSS", bedpe)))))
  return(tb.loop)
}

setOverlapColumn <- function(peakName, loop){
  tb.loop = as_tibble(loop)
  overlap = returnOverlapIndexLixt(get(paste0("peak.", peakName)), tb.loop)
  tb.loop[[paste0("A1_", peakName)]] = FALSE
  tb.loop[[paste0("A1_", peakName)]][overlap[[1]]] = TRUE
  tb.loop[[paste0("A2_", peakName)]] = FALSE
  tb.loop[[paste0("A2_", peakName)]][overlap[[2]]] = TRUE
  return(tb.loop)
}

returnOverlapIndexLixt <- function(peak, loop){
  anchor1.tb = as_tibble(loop) %>% dplyr::select(chrom1, start1, end1)
  anchor1 = makeGRangesFromDataFrame(data.frame(
    chr = anchor1.tb$chrom1,
    start = anchor1.tb$start1,
    end = anchor1.tb$end1
  ))
  
  anchor2.tb = as_tibble(loop) %>% dplyr::select(chrom2, start2, end2)
  anchor2 = makeGRangesFromDataFrame(data.frame(
    chr = anchor2.tb$chrom2,
    start = anchor2.tb$start2,
    end = anchor2.tb$end2
  ))
  
  overlap = list(overlap1 = unique(queryHits(findOverlaps(anchor1, peak))),
                 overlap2 = unique(queryHits(findOverlaps(anchor2, peak))))
  return(overlap)
}

saveAnnoGroupBedpe <- function(temp, anno.list, name, annoName, ouDir){
  loop = temp %>% dplyr::filter(Anno2 %in% anno.list) %>%
    dplyr::select(c("chrom1", "start1", "end1", "chrom2", "start2", "end2"))
  fwrite(loop, here(outDir, paste0(name, "_", annoName, ".bedpe")), sep = "\t", col.names = FALSE)
}

Annotation

consensus.loop.tb <- fread(here(consensusDir, "chromo_cons_score.tsv"))

################################################################################
# Filtering H3K4me3 peaks that has TSS nearby
# Since the finest resolution is 5kb, +-2.5kb will be used as a cutoff for checking TSS presence
flankSize <- 2500
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3),
                TSSstart = TSS - flankSize,
                TSSend = TSS + flankSize) %>%
  dplyr::select(V1, TSSstart, TSSend)
colnames(gene.tb) <- c("chr", "start", "end")
TSS1kb.gr <- makeGRangesFromDataFrame(gene.tb)
temp <- peak.H3K4me3[unique(queryHits(findOverlaps(peak.H3K4me3, TSS1kb.gr)))]
fwrite(as_tibble(temp), here(refDir, 
                             paste0("33255_H3K4me3_04-745_Bruce-4_peaks.mergePeak.", 
                                    flankSize/1000, "kbTSS.bed")), sep = "\t", col.names = FALSE)
peak.H3K4me3TSS <- importPeak(here(refDir, paste0("33255_H3K4me3_04-745_Bruce-4_peaks.mergePeak.", 
                                    flankSize/1000, "kbTSS.bed")))
################################################################################
# Annotating with strict priority (P-TSS > E > S)
temp.anno.TSS <- annotateAnchorTSS(consensus.loop.tb) %>%
  dplyr::mutate(
    diff_dTAG_DMSO = (dTAG-DMSO),
    diff_A485_DMSO = (A485-DMSO))

name <- "chromo_cons_annoHierarchy"
consensus.loop.anno.tb <- annotateLoopRelaxedTSS(temp.anno.TSS)
fwrite(consensus.loop.anno.tb, here(consensusDir, paste0(name, ".tsv")), 
       sep = "\t", col.names = TRUE)
createLoopAnnotation(consensus.loop.anno.tb, name, figDir, outDir,  colorListLoop)
saveAnnoGroupBedpe(consensus.loop.anno.tb, unique(consensus.loop.anno.tb$Anno2) , name, "all", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.tb, c("P-P", "P-E", "P-S", "P-X", "E-E", "E-S", "E-X"), name, "regulatory", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.tb, c("S-S", "S-X"), name, "structure", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.tb, c("X-X"), name, "x-x", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.tb, c("P-P", "P-E", "P-S", "P-X"), name, "p-n", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.tb, c("P-P", "P-E", "E-E"), name, "pe-pe", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.tb, c("P-P", "P-E"), name, "p-pe", outDir)

saveAnnoGroupBedpe(consensus.loop.anno.tb, c("P-P"), name, "p-p", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.tb, c("P-E"), name, "p-e", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.tb, c("P-S"), name, "p-s", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.tb, c("P-X"), name, "p-x", outDir)

################################################################################
# Annotating with one marker (Promoter)
name <- "chromo_cons_annoPromoter"
consensus.loop.anno.promoter.tb <- annotateLoopPromoterTSS(temp.anno.TSS)
fwrite(consensus.loop.anno.promoter.tb, here(consensusDir, paste0(name, ".tsv")), 
       sep = "\t", col.names = TRUE)
createLoopAnnotation(consensus.loop.anno.promoter.tb, name, 
                     figDir, outDir,  colorListPromoter)
saveAnnoGroupBedpe(consensus.loop.anno.promoter.tb, unique(consensus.loop.anno.promoter.tb$Anno2) , name, "all", outDir)  
saveAnnoGroupBedpe(consensus.loop.anno.promoter.tb, c("P-P"), name, "p-p", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.promoter.tb, c("P-N"), name, "p-n", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.promoter.tb, c("N-N"), name, "n-n", outDir)

################################################################################
# Annotating with one marker (Enhancer)
name <- "chromo_cons_annoEnhancer"
consensus.loop.anno.enhancer.tb <- annotateLoopEnhancer(temp.anno.TSS)
fwrite(consensus.loop.anno.enhancer.tb, here(consensusDir, paste0(name, ".tsv")), 
       sep = "\t", col.names = TRUE)
createLoopAnnotation(consensus.loop.anno.enhancer.tb, name, 
                     figDir, outDir,  colorListEnhancer)
saveAnnoGroupBedpe(consensus.loop.anno.enhancer.tb, unique(consensus.loop.anno.enhancer.tb$Anno2) , name, "all", outDir)  
saveAnnoGroupBedpe(consensus.loop.anno.enhancer.tb, c("E-E"), name, "e-e", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.enhancer.tb, c("E-N"), name, "e-n", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.enhancer.tb, c("N-N"), name, "n-n", outDir)
################################################################################
# Annotating with one marker (Structure)
name <- "chromo_cons_annoStructure"
consensus.loop.anno.structure.tb <- annotateLoopStructure(temp.anno.TSS)
fwrite(consensus.loop.anno.structure.tb, here(consensusDir, paste0(name, ".tsv")), 
       sep = "\t", col.names = TRUE)
createLoopAnnotation(consensus.loop.anno.structure.tb, name, 
                     figDir, outDir,  colorListStructure)
saveAnnoGroupBedpe(consensus.loop.anno.structure.tb, unique(consensus.loop.anno.structure.tb$Anno2) , name, "all", outDir)  
saveAnnoGroupBedpe(consensus.loop.anno.structure.tb, c("S-S"), name, "s-s", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.structure.tb, c("S-N"), name, "s-n", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.structure.tb, c("N-N"), name, "n-n", outDir)
Pure-reg vs Str-reg
################################################################################
# 2024.09.10 Splitting regulatory loop into pure regulatory and structure-related loops
name <- "chromo_cons_annoHierarchy"
consensus.loop.anno.tb <- fread(here(consensusDir, paste0(name, ".tsv")))
regulatory.loop.anno.tb <- consensus.loop.anno.tb %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "E-E"))

regulatory.loop.anno.tb <- regulatory.loop.anno.tb %>% rowwise() %>%
  dplyr::mutate(
    Anno3 = ifelse(A1_RAD21 | A1_CTCF | A2_RAD21 | A2_RAD21, paste0("str"),
                   paste0("reg"))
  )


fwrite(regulatory.loop.anno.tb, here(consensusDir, paste0(name, "_Anno3.tsv")), 
       sep = "\t", col.names = TRUE)

regulatory.loop.anno.tb_reg <- regulatory.loop.anno.tb %>% dplyr::filter(Anno3 == "reg")
regulatory.loop.anno.tb_str <- regulatory.loop.anno.tb %>% dplyr::filter(Anno3 == "str")

saveAnnoGroupBedpe(regulatory.loop.anno.tb_reg, c("P-P", "P-E", "E-E"), name, "pe-pe_reg", outDir)
saveAnnoGroupBedpe(regulatory.loop.anno.tb_str, c("P-P", "P-E", "E-E"), name, "pe-pe_str", outDir)

[2.6] Comparing across samples for conesnsus loop

Functions

create_loop_dis_vs_score <- function(data, figDir, name, Anno2List){
  data = data %>% dplyr::filter(Anno2 %in% Anno2List)
  
  ### barplot
  temp <- data %>% dplyr::select(id, DMSO, dTAG, A485) %>%
    pivot_longer(!id, names_to = "treatment", values_to = "score")
  temp$treatment <- factor(temp$treatment, levels = c("DMSO", "dTAG", "A485"))
  
  p3 <- ggplot(temp, aes(x = treatment, y = score)) +
    geom_violin(aes(fill = treatment), show.legend = FALSE) +
    scale_fill_manual(values = c("DMSO" = "grey", "dTAG" = "pink", "A485" = "skyblue")) +
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    ylim(-0.5, 1) +
    geom_hline(yintercept = 0, linetype = "dashed", color = "grey") +
    theme_classic() +
    ggtitle(name) +
    theme(plot.title = element_text(size = 4))
  fileName <- paste0("score_barplot_", name)
  png(here(figDir, paste0(fileName, ".png")), 
      res = 600, units = "in", width = 2, height = 4)
  print(p3)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")), 
          width = 2, height = 4)
  print(p3)
  dev.off()
  
  ### Distance vs score
  
  temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, DMSO, dTAG, A485)

  avg_scores <- temp %>%
    group_by(distance) %>%
    summarise(across(starts_with("DMSO"):starts_with("A485"), mean, na.rm = TRUE))

  avg_scores_long <- avg_scores %>%
    pivot_longer(cols = DMSO:A485, names_to = "condition", values_to = "avg_score")
  avg_scores_long$condition <- factor(avg_scores_long$condition, levels = c("DMSO", "dTAG", "A485"))
  # Create the plot
  p4 <- ggplot(avg_scores_long, aes(x = distance, y = avg_score, color = condition, fill = condition)) +
    geom_smooth(show.legend = TRUE) + ylim(0, 0.5) +
    theme_classic() +  scale_x_continuous(labels = label_kb_mb) +
    scale_color_manual(values = c("DMSO" = "grey", "dTAG" = "pink", "A485" = "skyblue")) +
    scale_fill_manual(values = c("DMSO" = "grey80", "dTAG" = "pink", "A485" = "skyblue")) +
    labs(title = paste0(name),
         x = "Distance",
         y = "Average Score") +
    theme(plot.title = element_text(size = 5))
  fileName <- paste0("dist_vs_score_linePlot_", name)
  png(here(figDir, paste0(fileName, ".png")),
      res = 600, units = "in", width = 4, height = 3)
  print(p4)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),
      width = 4, height = 3)
  print(p4)
  dev.off()
}

create_loop_dis_vs_diffscore <- function(data, figDir, name, Anno2List){
  data = data %>% dplyr::filter(Anno2 %in% Anno2List)
  
  
  temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, diff_dTAG_DMSO, diff_A485_DMSO)
  
  avg_scores <- temp %>%
    group_by(distance) %>%
    summarise(across(1:2, mean, na.rm = TRUE))
  
  avg_scores_long <- avg_scores %>%
    pivot_longer(cols = 2:3, names_to = "condition", values_to = "avg_score")
  avg_scores_long$condition <- factor(avg_scores_long$condition, 
                                      levels = c("diff_dTAG_DMSO", "diff_A485_DMSO"))
  # Create the plot
  p4 <- ggplot(avg_scores_long, aes(x = distance, y = avg_score, color = condition, fill = condition)) +
    geom_smooth(show.legend = TRUE) + geom_hline(yintercept = 0) + ylim(-0.5, 0.1) +
    theme_classic() +  scale_x_continuous(labels = label_kb_mb) +
    scale_color_manual(values = c("diff_dTAG_DMSO" = "pink", "diff_A485_DMSO" = "skyblue")) +
    scale_fill_manual(values = c("diff_dTAG_DMSO" = "pink", "diff_A485_DMSO" = "skyblue")) +
    labs(title = paste0(name),
         x = "Distance",
         y = "Average Diff Score") +
    theme(plot.title = element_text(size = 5))
  fileName <- paste0("dist_vs_score_difflinePlot_", name)
  png(here(figDir, paste0(fileName, ".png")),
      res = 600, units = "in", width = 4.5, height = 3)
  print(p4)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),
          width = 4.5, height = 3)
  print(p4)
  dev.off()
}

create_loop_scatterplot <- function(data, figDir, name, Anno2List, diffCutoff){
  data <- data %>%
    dplyr::mutate(updown_dTAG_DMSO = ifelse(diff_dTAG_DMSO > diffCutoff, "UP", 
                                            ifelse(diff_dTAG_DMSO > -diffCutoff, "NO", "DOWN")),
                  updown_A485_DMSO = ifelse(diff_A485_DMSO > diffCutoff, "UP", 
                                            ifelse(diff_A485_DMSO > -diffCutoff, "NO", "DOWN"))) %>%
    dplyr::filter(Anno2 %in% Anno2List)
  data$updown_dTAG_DMSO <- factor(data$updown_dTAG_DMSO, levels = c("UP", "NO", "DOWN"))
  data$updown_A485_DMSO <- factor(data$updown_A485_DMSO, levels = c("UP", "NO", "DOWN"))
  
  num.up <- (summary(data$updown_dTAG_DMSO))["UP"]
  num.no <- (summary(data$updown_dTAG_DMSO))["NO"]
  num.down <- (summary(data$updown_dTAG_DMSO))["DOWN"]
  num.all <- num.up + num.no + num.down
  perc.up <- round(num.up / num.all * 100, 2)
  perc.no <- round(num.no / num.all * 100, 2)
  perc.down <- round(num.down / num.all * 100, 2)
  
  ### Scatterplot
  data$density <- get_density(data$DMSO, data$dTAG, n = 100)
  data <- data %>% dplyr::arrange(density)
  correlation <- cor(data$DMSO, data$dTAG)
  p1 <- ggplot(data, aes(x = DMSO, y = dTAG, color = density)) +
    geom_point(size = 1,
      alpha = 1,
      stroke = 0) + 
    scale_color_viridis(option = "D", guide = guide_colorbar(
                          barwidth = 1.5/5.08,  # Adjust width of the color bar
                          barheight = 15/5.08   # Adjust height of the color bar
                        )) +
    xlim(-0.5, 1) + ylim(-0.5, 1) + coord_fixed() +
    geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed",
      color = "black",
      size = lineThick*mmToLineUnit,
      lineend = "square") +
    geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted",
      color = "black",
      size = lineThick*mmToLineUnit,
      lineend = "square") +
    geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted",
      color = "black",
      size = lineThick*mmToLineUnit,
      lineend = "square") +
    geom_hline(yintercept = 0, alpha = 0.5, color = "grey",
      color = "black",
      size = lineThick*mmToLineUnit,
      lineend = "square") +
    geom_vline(xintercept = 0, alpha = 0.5, color = "grey",
      color = "black",
      size = lineThick*mmToLineUnit,
      lineend = "square") +
    annotate("text", x = -0.5, y = 1, label = paste0("UP: ", num.up, " (", perc.up, "%)"), 
             color = "black", hjust = 0, , size = 1,
        family = fontType) +
    annotate("text", x = -0.5, y = 1-0.1, label = paste0("NO: ", num.no, " (", perc.no, "%)"), 
             color = "black", hjust = 0, , size = 1,
        family = fontType) +
    annotate("text", x = -0.5, y = 1-0.2, label = paste0("DOWN: ", num.down, " (", perc.down, "%)"), 
             color = "black", hjust = 0, , size = 1,
        family = fontType) +
    theme_classic() +
    #annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black") +
    theme(
      # legend.position = "none",
      plot.title = element_text(
        hjust = 0.5,
        size = fontSizeS,
        family = fontType
      ),
      axis.title = element_text(
        size = fontSizeS,
        family = fontType,
        color = "#000000"
      ),
      axis.text = element_text(
        size = fontSizeS,
        family = fontType,
        color = "#000000"
      ),
      axis.line = element_line(
        color = "#000000",
        size = lineThick*mmToLineUnit,
        lineend = "square"
      ),
      axis.ticks = element_line(
        color = "#000000",
        size = lineThick*mmToLineUnit,
        lineend = "square"
      ),
      panel.background = element_rect(fill = "transparent"),
    ) +
  xlab("Loop score\nG1.DMSO") +
    ylab("Loop score\nG1.dTAG")
  
  
  
  num.up <- (summary(data$updown_A485_DMSO))["UP"]
  num.no <- (summary(data$updown_A485_DMSO))["NO"]
  num.down <- (summary(data$updown_A485_DMSO))["DOWN"]
  num.all <- num.up + num.no + num.down
  perc.up <- round(num.up / num.all * 100, 2)
  perc.no <- round(num.no / num.all * 100, 2)
  perc.down <- round(num.down / num.all * 100, 2)
  
  data$density <- get_density(data$DMSO, data$A485, n = 100)
  data <- data %>% dplyr::arrange(density)
  correlation <- cor(data$DMSO, data$A485)
  p2 <- ggplot(data, aes(x = DMSO, y = A485, color = density)) +
    geom_point(size = 1,
      alpha = 1,
      stroke = 0) + 
    scale_color_viridis(option = "D", guide = guide_colorbar(
                          barwidth = 1.5/5.08,  # Adjust width of the color bar
                          barheight = 15/5.08   # Adjust height of the color bar
                        )) +
    xlim(-0.5, 1) + ylim(-0.5, 1) + coord_fixed() +
    geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed",
      color = "black",
      size = lineThick*mmToLineUnit,
      lineend = "square") +
    geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted",
      color = "black",
      size = lineThick*mmToLineUnit,
      lineend = "square") +
    geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted",
      color = "black",
      size = lineThick*mmToLineUnit,
      lineend = "square") +
    geom_hline(yintercept = 0, alpha = 0.5, color = "grey",
      color = "black",
      size = lineThick*mmToLineUnit,
      lineend = "square") +
    geom_vline(xintercept = 0, alpha = 0.5, color = "grey",
      color = "black",
      size = lineThick*mmToLineUnit,
      lineend = "square") +
    annotate("text", x = -0.5, y = 1, label = paste0("UP: ", num.up, " (", perc.up, "%)"), 
             color = "black", hjust = 0, size = 1,
        family = fontType) +
    annotate("text", x = -0.5, y = 1-0.1, label = paste0("NO: ", num.no, " (", perc.no, "%)"), 
             color = "black", hjust = 0, size = 1,
        family = fontType) +
    annotate("text", x = -0.5, y = 1-0.2, label = paste0("DOWN: ", num.down, " (", perc.down, "%)"), 
             color = "black", hjust = 0, size = 1,
        family = fontType) +
    theme_classic() + 
    # annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black") +
    theme(
      # legend.position = "none",
      plot.title = element_text(
        hjust = 0.5,
        size = fontSizeS,
        family = fontType
      ),
      axis.title = element_text(
        size = fontSizeS,
        family = fontType,
        color = "#000000"
      ),
      axis.text = element_text(
        size = fontSizeS,
        family = fontType,
        color = "#000000"
      ),
      axis.line = element_line(
        color = "#000000",
        size = lineThick*mmToLineUnit,
        lineend = "square"
      ),
      axis.ticks = element_line(
        color = "#000000",
        size = lineThick*mmToLineUnit,
        lineend = "square"
      ),
      panel.background = element_rect(fill = "transparent"),
    ) +
    xlab("Loop score\nG1.DMSO") +
    ylab("Loop score\nG1.A485")

    width <- panelSize(1.5)*mmToInch
  height <- panelSize(1.5)*mmToInch  
  
  fileName <- here(figDir,paste0("scatterplot_", name, "_dTAG_vs_DMSO_", diffCutoff))

  svglite(paste0(fileName, ".svg"), width = width, height =height)
  print(p1)
  dev.off()
  png(paste0(fileName, ".png"), width = width, height =height, res = 600, unit = "in")
  print(p1)
  dev.off()
  
    fileName <- here(figDir, paste0("scatterplot_", name, "_A485_vs_DMSO_", diffCutoff))

  svglite(paste0(fileName, ".svg"), width = width, height =height)
  print(p2)
  dev.off()
  png(paste0(fileName, ".png"), width = width, height =height, res = 600, unit = "in")
  print(p2)
  dev.off()
  
  # 
  # 
  # 
  # png(here(figDir, paste0(fileName, ".png")), res = 600, units = "in", width = 5*1.5, height = 2.5*1.5)
  # print(cowplot::plot_grid(p1, p2, align = "h"))
  # dev.off()
  # 
  # svglite(here(figDir, paste0(fileName, ".svg")), 
  #         width = 5*1.5, height = 2.5*1.5)
  # print(cowplot::plot_grid(p1, p2, align = "h"))
  # dev.off()
}

make_diff_bedpe <- function(data, name, Anno2List, outDir, diffCutoff){
  data <- data %>%
    dplyr::mutate(updown_dTAG_DMSO = ifelse(diff_dTAG_DMSO > diffCutoff, "UP", 
                                            ifelse(diff_dTAG_DMSO > -diffCutoff, "NO", "DOWN")),
                  updown_A485_DMSO = ifelse(diff_A485_DMSO > diffCutoff, "UP", 
                                            ifelse(diff_A485_DMSO > -diffCutoff, "NO", "DOWN"))) %>%
    dplyr::filter(Anno2 %in% Anno2List)
  data$updown_dTAG_DMSO <- factor(data$updown_dTAG_DMSO, levels = c("UP", "NO", "DOWN"))
  data$updown_A485_DMSO <- factor(data$updown_A485_DMSO, levels = c("UP", "NO", "DOWN"))
  
  out.temp <- data %>% dplyr::filter(updown_dTAG_DMSO == "UP") %>% dplyr::select(c(1, 2, 3, 4, 5, 6))
  fwrite(out.temp, here(outDir, paste0(name, "_dTAGvsDMSO_UP_diff", diffCutoff, ".bedpe")), 
         sep = "\t", col.names = FALSE)
  out.temp <- data %>% dplyr::filter(updown_dTAG_DMSO == "NO") %>% dplyr::select(c(1, 2, 3, 4, 5, 6))
  fwrite(out.temp, here(outDir, paste0(name, "_dTAGvsDMSO_NO_diff", diffCutoff, ".bedpe")), 
         sep = "\t", col.names = FALSE)
  out.temp <- data %>% dplyr::filter(updown_dTAG_DMSO == "DOWN") %>% dplyr::select(c(1, 2, 3, 4, 5, 6))
  fwrite(out.temp, here(outDir, paste0(name, "_dTAGvsDMSO_DOWN_diff", diffCutoff, ".bedpe")), 
         sep = "\t", col.names = FALSE)
  
  out.temp <- data %>% dplyr::filter(updown_A485_DMSO == "UP") %>% dplyr::select(c(1, 2, 3, 4, 5, 6))
  fwrite(out.temp, here(outDir, paste0(name, "_A485vsDMSO_UP_diff", diffCutoff, ".bedpe")), 
         sep = "\t", col.names = FALSE)
  out.temp <- data %>% dplyr::filter(updown_A485_DMSO == "NO") %>% dplyr::select(c(1, 2, 3, 4, 5, 6))
  fwrite(out.temp, here(outDir, paste0(name, "_A485vsDMSO_NO_diff", diffCutoff, ".bedpe")), 
         sep = "\t", col.names = FALSE)
  out.temp <- data %>% dplyr::filter(updown_A485_DMSO == "DOWN") %>% dplyr::select(c(1, 2, 3, 4, 5, 6))
  fwrite(out.temp, here(outDir, paste0(name, "_A485vsDMSO_DOWN_diff", diffCutoff, ".bedpe")), 
         sep = "\t", col.names = FALSE)
}

create_dist_vs_avgScore_perTreatment <- function(data, figDir, name, loopList, colorList, se = FALSE){
  avg_scores_long <- data %>%
    group_by(distance, Anno2) %>%
    summarise(avg_score = mean(score, na.rm = TRUE)) %>%
    ungroup() 
  avg_scores_long$Anno2 <- factor(avg_scores_long$Anno2, level = loopList)
  p4 <- ggplot(avg_scores_long, aes(x = distance, y = avg_score, color = Anno2, fill = Anno2)) + 
    geom_smooth(show.legend = TRUE, se = se)  +
    ylim(0, 0.5) +
    theme_classic() +  scale_x_continuous(labels = label_kb_mb) +
    scale_color_manual(values = colorList) +
    scale_fill_manual(values = colorList) +
    labs(title = paste0(name),
         x = "Distance",
         y = "Average Score") +
    theme(plot.title = element_text(size = 8))
  
  fileName <- paste0("dist_vs_score_linePlot_", name)
  png(here(figDir, paste0(fileName, ".png")), 
      res = 600, units = "in", width = 4, height = 3)
  print(p4)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")), 
          width = 4, height = 3)
  print(p4)
  dev.off()
} 

create_dist_vs_avgDiffScore_perTreatment <- function(data, figDir, name, loopList, colorList, se = FALSE){
  avg_scores_long <- data %>%
    group_by(distance, Anno2) %>%
    summarise(avg_score = mean(score, na.rm = TRUE)) %>%
    ungroup() 
  avg_scores_long$Anno2 <- factor(avg_scores_long$Anno2, level = loopList)
  p4 <- ggplot(avg_scores_long, aes(x = distance, y = avg_score, color = Anno2, fill = Anno2)) + 
    geom_hline(yintercept = 0) +
    geom_smooth(show.legend = TRUE, se = se)  +
    theme_classic() +  scale_x_continuous(labels = label_kb_mb) +
    scale_color_manual(values = colorList) +
    scale_fill_manual(values = colorList) +
    labs(title = paste0(name),
         x = "Distance",
         y = "Average Diff Score") +
    theme(plot.title = element_text(size = 8)) 
  
  fileName <- paste0("dist_vs_score_difflinePlot_", name)
  png(here(figDir, paste0(fileName, ".png")), 
      res = 600, units = "in", width = 4, height = 3)
  print(p4)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")), 
          width = 4, height = 3)
  print(p4)
  dev.off()
} 

create_score_barplot_perTreatment <- function(data, figDir, name, loopList, colorList){
  data$Anno2 <- factor(data$Anno2, levels = loopList)
  
  p3 <- ggplot(data, aes(x = Anno2, y = score)) +
    geom_violin(aes(fill = Anno2), show.legend = FALSE) +
    scale_fill_manual(values = colorList) +
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    ylim(-0.5, 1) +
    geom_hline(yintercept = 0, linetype = "dashed", color = "grey") +
    theme_classic() +
    ggtitle(name) +
    theme(plot.title = element_text(size = 4))
  
  fileName <- paste0("score_barPlot_", name)
  
  png(here(figDir, paste0(fileName, ".png")), 
      res = 600, units = "in", width = 2, height = 4)
  print(p3)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")), 
          width = 2, height = 4)
  print(p3)
}


iterate_loop_functions <- function(data, figDir, groupName, annoList){
  # create_loop_dis_vs_score(data, figDir, groupName, annoList)
  # create_loop_dis_vs_diffscore(data, figDir, groupName, annoList)

  create_loop_scatterplot(data, figDir, groupName, annoList, 0.2)
  # create_loop_scatterplot(data, figDir, groupName, annoList, 0.1)
  # make_diff_bedpe(data, groupName, annoList, consensusDir, 0.2)
  # make_diff_bedpe(data, groupName, annoList, consensusDir, 0.1)
}

Relaxed annotation


name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))

iterate_loop_functions(data, figDir, paste0(name, "_", "cl_all"), unique(data$Anno2))
iterate_loop_functions(data, figDir, paste0(name, "_", "cl_structure"), 
                       c("S-S", "S-X"))
iterate_loop_functions(data, figDir, paste0(name, "_", "s-s"), 
                       c("S-S"))
iterate_loop_functions(data, figDir, paste0(name, "_", "cl_regulatory"), 
                       c("P-P", "P-E", "P-S", "P-X", "E-E", "E-S", "E-X"))
iterate_loop_functions(data, figDir, paste0(name, "_", "cl_pe-pe"), 
                       c("P-P", "P-E", "E-E"))
iterate_loop_functions(data, figDir, paste0(name, "_", "p-pe"), 
                       c("P-P", "P-E"))
iterate_loop_functions(data, figDir, paste0(name, "_", "p-p"), 
                       c("P-P"))
iterate_loop_functions(data, figDir, paste0(name, "_", "p-e"), 
                       c("P-E"))
iterate_loop_functions(data, figDir, paste0(name, "_", "e-e"), 
                       c("E-E"))
iterate_loop_functions(data, figDir, paste0(name, "_", "x-x"), 
                       c("X-X"))
###########
# Creating differential scatterplot
View(data)



#######################
# Creating figures per each condition
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment(temp, figDir, paste0(name, "_DMSO"), c("X-X",
                                                                 "S-X", "S-S",
                                                                 "E-X","E-S","E-E",
                                                                 "P-E","P-X", "P-S", "P-P"), colorListLoop)
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, dTAG, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment(temp, figDir, paste0(name, "_dTAG"), c("X-X",
                                                                 "S-X", "S-S",
                                                                 "E-X","E-S","E-E",
                                                                 "P-E","P-X", "P-S", "P-P"), colorListLoop)

temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, A485, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment(temp, figDir, paste0(name, "_A485"), c("X-X",
                                                                 "S-X", "S-S",
                                                                 "E-X","E-S","E-E",
                                                                 "P-E","P-X", "P-S", "P-P"), colorListLoop)

#######################
# Creating figures per each condition, differential
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, diff_dTAG_DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgDiffScore_perTreatment(temp, figDir, paste0(name, "_dTAG"), c("X-X",
                                                                 "S-X", "S-S",
                                                                 "E-X","E-S","E-E",
                                                                 "P-E","P-X", "P-S", "P-P"), colorListLoop)
create_dist_vs_avgDiffScore_perTreatment(temp, figDir, paste0(name, "_dTAG_SE"), c("X-X",
                                                                 "S-X", "S-S",
                                                                 "E-X","E-S","E-E",
                                                                 "P-E","P-X", "P-S", "P-P"), colorListLoop, se = TRUE)
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, diff_A485_DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgDiffScore_perTreatment(temp, figDir, paste0(name, "_A485"), c("X-X",
                                                                 "S-X", "S-S",
                                                                 "E-X","E-S","E-E",
                                                                 "P-E","P-X", "P-S", "P-P"), colorListLoop)

create_dist_vs_avgDiffScore_perTreatment(temp, figDir, paste0(name, "_A485_SE"), c("X-X",
                                                                 "S-X", "S-S",
                                                                 "E-X","E-S","E-E",
                                                                 "P-E","P-X", "P-S", "P-P"), colorListLoop, se = TRUE)
Pure-reg vs str-reg
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, "_Anno3.tsv")))
diffCutoff <- 0.2

# Splitting data
data.reg <- data %>% dplyr::filter(Anno3 == "reg")
data.str <- data %>% dplyr::filter(Anno3 == "str")

# Taking codes from previous function (str)
data <- data.str
data <- data %>%
  dplyr::mutate(updown_dTAG_DMSO = ifelse(diff_dTAG_DMSO > diffCutoff, "UP", 
                                          ifelse(diff_dTAG_DMSO > -diffCutoff, "NO", "DOWN")),
                updown_A485_DMSO = ifelse(diff_A485_DMSO > diffCutoff, "UP", 
                                          ifelse(diff_A485_DMSO > -diffCutoff, "NO", "DOWN")))
data$updown_dTAG_DMSO <- factor(data$updown_dTAG_DMSO, levels = c("UP", "NO", "DOWN"))
data$updown_A485_DMSO <- factor(data$updown_A485_DMSO, levels = c("UP", "NO", "DOWN"))

num.up <- (summary(data$updown_dTAG_DMSO))["UP"]
num.no <- (summary(data$updown_dTAG_DMSO))["NO"]
num.down <- (summary(data$updown_dTAG_DMSO))["DOWN"]
num.all <- num.up + num.no + num.down
perc.up <- round(num.up / num.all * 100, 2)
perc.no <- round(num.no / num.all * 100, 2)
perc.down <- round(num.down / num.all * 100, 2)

data$density <- get_density(data$DMSO, data$dTAG, n = 100)
data <- data %>% dplyr::arrange(density)
p1 <- ggplot(data, aes(x = DMSO, y = dTAG, color = density)) +
  geom_point(show.legend = FALSE) + 
  scale_color_viridis() +
  xlim(-0.5, 1) + ylim(-0.5, 1) + coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
  geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  annotate("text", x = -0.5, y = 1, label = paste0("UP: ", num.up, " (", perc.up, "%)"), 
           color = "black", hjust = 0, size = 3) +
  annotate("text", x = -0.5, y = 1-0.1, label = paste0("NO: ", num.no, " (", perc.no, "%)"), 
           color = "black", hjust = 0, size = 3) +
  annotate("text", x = -0.5, y = 1-0.2, label = paste0("DOWN: ", num.down, " (", perc.down, "%)"), 
           color = "black", hjust = 0, size = 3) +
  theme_classic() + ggtitle(name) + theme(plot.title = element_text(size = 5))

# Taking codes from previous function (reg)
data <- data.reg
data <- data %>%
  dplyr::mutate(updown_dTAG_DMSO = ifelse(diff_dTAG_DMSO > diffCutoff, "UP", 
                                          ifelse(diff_dTAG_DMSO > -diffCutoff, "NO", "DOWN")),
                updown_A485_DMSO = ifelse(diff_A485_DMSO > diffCutoff, "UP", 
                                          ifelse(diff_A485_DMSO > -diffCutoff, "NO", "DOWN")))
data$updown_dTAG_DMSO <- factor(data$updown_dTAG_DMSO, levels = c("UP", "NO", "DOWN"))
data$updown_A485_DMSO <- factor(data$updown_A485_DMSO, levels = c("UP", "NO", "DOWN"))

num.up <- (summary(data$updown_dTAG_DMSO))["UP"]
num.no <- (summary(data$updown_dTAG_DMSO))["NO"]
num.down <- (summary(data$updown_dTAG_DMSO))["DOWN"]
num.all <- num.up + num.no + num.down
perc.up <- round(num.up / num.all * 100, 2)
perc.no <- round(num.no / num.all * 100, 2)
perc.down <- round(num.down / num.all * 100, 2)

data$density <- get_density(data$DMSO, data$dTAG, n = 100)
data <- data %>% dplyr::arrange(density)
p2 <- ggplot(data, aes(x = DMSO, y = dTAG, color = density)) +
  geom_point(show.legend = FALSE) + 
  scale_color_viridis() +
  xlim(-0.5, 1) + ylim(-0.5, 1) + coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
  geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  annotate("text", x = -0.5, y = 1, label = paste0("UP: ", num.up, " (", perc.up, "%)"), 
           color = "black", hjust = 0, size = 3) +
  annotate("text", x = -0.5, y = 1-0.1, label = paste0("NO: ", num.no, " (", perc.no, "%)"), 
           color = "black", hjust = 0, size = 3) +
  annotate("text", x = -0.5, y = 1-0.2, label = paste0("DOWN: ", num.down, " (", perc.down, "%)"), 
           color = "black", hjust = 0, size = 3) +
  theme_classic() + ggtitle(name) + theme(plot.title = element_text(size = 5))  
  

fileName <- paste0("scatterplot_", name, "_", diffCutoff, "_pure_str_reg")
png(here(figDir, paste0(fileName, ".png")), res = 600, units = "in", width = 5*1.5, height = 2.5*1.5)
print(cowplot::plot_grid(p1, p2, align = "h"))
dev.off()

svglite(here(figDir, paste0(fileName, ".svg")), 
        width = 5*1.5, height = 2.5*1.5)
print(cowplot::plot_grid(p1, p2, align = "h"))
dev.off()
50kb vs 200 kb
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, "_Anno3.tsv")))

data <- data %>% dplyr::mutate(size = start2 - start1)

data_under50kb <- data %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "E-E"), size < 50*1000, Anno3 == "reg")
data_under200kb <- data %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "E-E"), size >= 50*1000, size < 200*1000, Anno3 == "reg")
data_over200kb <- data %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "E-E"), size >= 200*1000, Anno3 == "reg")

########

data <- data_under50kb

diffCutoff <- 0.2
data <- data %>%
  dplyr::mutate(updown_dTAG_DMSO = ifelse(diff_dTAG_DMSO > diffCutoff, "UP", 
                                          ifelse(diff_dTAG_DMSO > -diffCutoff, "NO", "DOWN")),
                updown_A485_DMSO = ifelse(diff_A485_DMSO > diffCutoff, "UP", 
                                          ifelse(diff_A485_DMSO > -diffCutoff, "NO", "DOWN")))
data$updown_dTAG_DMSO <- factor(data$updown_dTAG_DMSO, levels = c("UP", "NO", "DOWN"))
data$updown_A485_DMSO <- factor(data$updown_A485_DMSO, levels = c("UP", "NO", "DOWN"))

####
num.up <- (summary(data$updown_dTAG_DMSO))["UP"]
num.no <- (summary(data$updown_dTAG_DMSO))["NO"]
num.down <- (summary(data$updown_dTAG_DMSO))["DOWN"]
num.all <- num.up + num.no + num.down
perc.up <- round(num.up / num.all * 100, 2)
perc.no <- round(num.no / num.all * 100, 2)
perc.down <- round(num.down / num.all * 100, 2)

data$density <- get_density(data$DMSO, data$dTAG, n = 100)
data <- data %>% dplyr::arrange(density)
p1 <- ggplot(data, aes(x = DMSO, y = dTAG, color = density)) +
  geom_point(show.legend = FALSE) + 
  scale_color_viridis() +
  xlim(-0.5, 1) + ylim(-0.5, 1) + coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
  geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  annotate("text", x = -0.5, y = 1, label = paste0("UP: ", num.up, " (", perc.up, "%)"), 
           color = "black", hjust = 0, size = 3) +
  annotate("text", x = -0.5, y = 1-0.1, label = paste0("NO: ", num.no, " (", perc.no, "%)"), 
           color = "black", hjust = 0, size = 3) +
  annotate("text", x = -0.5, y = 1-0.2, label = paste0("DOWN: ", num.down, " (", perc.down, "%)"), 
           color = "black", hjust = 0, size = 3) +
  theme_classic() + ggtitle(name) + theme(plot.title = element_text(size = 5))  
  

####
num.up <- (summary(data$updown_A485_DMSO))["UP"]
num.no <- (summary(data$updown_A485_DMSO))["NO"]
num.down <- (summary(data$updown_A485_DMSO))["DOWN"]
num.all <- num.up + num.no + num.down
perc.up <- round(num.up / num.all * 100, 2)
perc.no <- round(num.no / num.all * 100, 2)
perc.down <- round(num.down / num.all * 100, 2)

data$density <- get_density(data$DMSO, data$A485, n = 100)
data <- data %>% dplyr::arrange(density)
p2 <- ggplot(data, aes(x = DMSO, y = A485, color = density)) +
  geom_point(show.legend = FALSE) + 
  scale_color_viridis() +
  xlim(-0.5, 1) + ylim(-0.5, 1) + coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
  geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  annotate("text", x = -0.5, y = 1, label = paste0("UP: ", num.up, " (", perc.up, "%)"), 
           color = "black", hjust = 0, size = 3) +
  annotate("text", x = -0.5, y = 1-0.1, label = paste0("NO: ", num.no, " (", perc.no, "%)"), 
           color = "black", hjust = 0, size = 3) +
  annotate("text", x = -0.5, y = 1-0.2, label = paste0("DOWN: ", num.down, " (", perc.down, "%)"), 
           color = "black", hjust = 0, size = 3) +
  theme_classic() + ggtitle(name) + theme(plot.title = element_text(size = 5))  
  

fileName <- paste0("scatterplot_", name, "_", diffCutoff, "_regunder50kb_pure")
png(here(figDir, paste0(fileName, ".png")), res = 600, units = "in", width = 5*1.5, height = 2.5*1.5)
print(cowplot::plot_grid(p1, p2, align = "h"))
dev.off()

svglite(here(figDir, paste0(fileName, ".svg")), 
        width = 5*1.5, height = 2.5*1.5)
print(cowplot::plot_grid(p1, p2, align = "h"))
dev.off()

One feature annotation - promoter


name <- "chromo_cons_annoPromoter"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
iterate_loop_functions(data, figDir, paste0(name, "_", "all"), unique(data$Anno2))
iterate_loop_functions(data, figDir, paste0(name, "_", "p-p"), c("P-P"))
iterate_loop_functions(data, figDir, paste0(name, "_", "p-n"), c("P-N"))
iterate_loop_functions(data, figDir, paste0(name, "_", "n-n"), c("N-N"))

colorList <- colorListPromoter
loopList <- c("N-N", "P-N", "P-P")

temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment(temp, figDir, paste0(name, "_DMSO"), 
                                     loopList, colorList, se = TRUE)
create_score_barplot_perTreatment(temp, figDir, paste0(name, "_DMSO"), 
                                      loopList, colorList)
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, dTAG, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment(temp, figDir, paste0(name, "_dTAG"), 
                                     loopList, colorList, se = TRUE)

create_score_barplot_perTreatment(temp, figDir, paste0(name, "_dTAG"), 
                                      loopList, colorList)
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, A485, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment(temp, figDir, paste0(name, "_A485"), 
                                     loopList, colorList, se = TRUE)
create_score_barplot_perTreatment(temp, figDir, paste0(name, "_A485"), 
                                      loopList, colorList)

temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, diff_dTAG_DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgDiffScore_perTreatment(temp, figDir, paste0(name, "_dTAG"), 
                                     loopList, colorList, se = TRUE)

temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, diff_A485_DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgDiffScore_perTreatment(temp, figDir, paste0(name, "_A485"), 
                                     loopList, colorList, se = TRUE)

One feature annotation - enhancer

name <- "chromo_cons_annoEnhancer"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
iterate_loop_functions(data, figDir, paste0(name, "_", "all"), unique(data$Anno2))
iterate_loop_functions(data, figDir, paste0(name, "_", "e-e"), c("E-E"))
iterate_loop_functions(data, figDir, paste0(name, "_", "e-n"), c("E-N"))
iterate_loop_functions(data, figDir, paste0(name, "_", "n-n"), c("N-N"))

colorList <- colorListEnhancer
loopList <- c("N-N", "E-N", "E-E")

temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment(temp, figDir, paste0(name, "_DMSO"), 
                                     loopList, colorList, se = TRUE)
create_score_barplot_perTreatment(temp, figDir, paste0(name, "_DMSO"), 
                                      loopList, colorList)
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, dTAG, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment(temp, figDir, paste0(name, "_dTAG"), 
                                     loopList, colorList, se = TRUE)
create_score_barplot_perTreatment(temp, figDir, paste0(name, "_dTAG"), 
                                      loopList, colorList)
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, A485, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment(temp, figDir, paste0(name, "_A485"), 
                                     loopList, colorList, se = TRUE)
create_score_barplot_perTreatment(temp, figDir, paste0(name, "_A485"), 
                                      loopList, colorList)

temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, diff_dTAG_DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgDiffScore_perTreatment(temp, figDir, paste0(name, "_dTAG"), 
                                     loopList, colorList, se = TRUE)

temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, diff_A485_DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgDiffScore_perTreatment(temp, figDir, paste0(name, "_A485"), 
                                     loopList, colorList, se = TRUE)

One feature annotation - structure

name <- "chromo_cons_annoStructure"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
iterate_loop_functions(data, figDir, paste0(name, "_", "all"), unique(data$Anno2))
iterate_loop_functions(data, figDir, paste0(name, "_", "s-s"), c("S-S"))
iterate_loop_functions(data, figDir, paste0(name, "_", "s-n"), c("S-N"))
iterate_loop_functions(data, figDir, paste0(name, "_", "n-n"), c("N-N"))

colorList <- colorListStructure
loopList <- c("N-N", "S-N", "S-S")

temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment(temp, figDir, paste0(name, "_DMSO"), 
                                     loopList, colorList, se = TRUE)
create_score_barplot_perTreatment(temp, figDir, paste0(name, "_DMSO"), 
                                      loopList, colorList)
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, dTAG, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment(temp, figDir, paste0(name, "_dTAG"), 
                                     loopList, colorList, se = TRUE)
create_score_barplot_perTreatment(temp, figDir, paste0(name, "_dTAG"), 
                                      loopList, colorList)
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, A485, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment(temp, figDir, paste0(name, "_A485"), 
                                     loopList, colorList, se = TRUE)
create_score_barplot_perTreatment(temp, figDir, paste0(name, "_A485"), 
                                      loopList, colorList)

temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, diff_dTAG_DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgDiffScore_perTreatment(temp, figDir, paste0(name, "_dTAG"), 
                                     loopList, colorList, se = TRUE)

temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, diff_A485_DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgDiffScore_perTreatment(temp, figDir, paste0(name, "_A485"), 
                                     loopList, colorList, se = TRUE)

[2.7] Distance vs loop # and score

data <- fread(here(consensusDir, "chromo_cons_score.tsv")) %>%
  dplyr::mutate(distance = start2 - start1,
                res = end1 - start1)

p1 <- ggplot(data, aes(x = distance, fill = factor(res))) +
  geom_histogram(binwidth = 50000, alpha = 1) +
  labs(title = "Distribution of Distance by Resolution",
       x = "Distance",
       y = "Counts") + 
  facet_wrap(~ res, ncol = 1, scales = "free") + 
  scale_x_continuous(labels = label_kb_mb, limits = c(0, 5e6)) + 
  theme_classic() +
  theme(legend.position = "none",
        plot.title = element_text(size = 8))  # Removes the legend

png(here(figDir, paste0("consensus_dist_per_res.png")), 
    res = 600, units = "in", width = 3, height = 6)
print(p1)
dev.off()

p2 <- ggplot(data, aes(x = distance, fill = factor(res))) +
  geom_histogram(binwidth = 50000, alpha = 1) +
  labs(title = "Distribution of Distance by Resolution",
       x = "Distance",
       y = "Counts") + 
  facet_wrap(~ res, ncol = 1, scales = "free") + 
  scale_x_continuous(labels = label_kb_mb, limits = c(0, 2e6)) + 
  theme_classic() +
  theme(legend.position = "none",
        plot.title = element_text(size = 8))  # Removes the legend


png(here(figDir, paste0("consensus_dist_per_res_2mb.png")), 
    res = 600, units = "in", width = 3, height = 6)
print(p2)
dev.off()
### Distance vs score
  
  temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, DMSO, dTAG, A485)
  
  avg_scores <- temp %>%
    group_by(distance) %>%
    summarise(across(starts_with("DMSO"):starts_with("A485"), mean, na.rm = TRUE))
  
  avg_scores_long <- avg_scores %>%
    pivot_longer(cols = DMSO:A485, names_to = "condition", values_to = "avg_score")
  avg_scores_long$condition <- factor(avg_scores_long$condition, levels = c("DMSO", "dTAG", "A485"))
  # Create the plot
  p4 <- ggplot(avg_scores_long, aes(x = distance, y = avg_score, color = condition, fill = condition)) +
    geom_smooth(show.legend = TRUE) + ylim(0, 0.5) +
    theme_classic() +  scale_x_continuous(labels = label_kb_mb) +
    scale_color_manual(values = c("DMSO" = "grey", "dTAG" = "pink", "A485" = "skyblue")) +
    scale_fill_manual(values = c("DMSO" = "grey80", "dTAG" = "pink", "A485" = "skyblue")) +
    labs(title = paste0("Distance vs. Average Score, ", note),
         x = "Distance",
         y = "Average Score") +
    theme(plot.title = element_text(size = 12))
  png(here(figDir, paste0("consensus_dist_vs_avgScore_allRes_pu100pz100_", note, ".png")), 
      res = 600, units = "in", width = 4, height = 3)
  print(p4)
  dev.off()

[2.8] Annotating genes to P-P and P-E loops

[2.9] Size distribution of diff loops

create_dist_barplot <- function(data, figDir, name, note, loopList, diffCutoff){
  temp <- data %>% dplyr::select(Anno2, distance, updown_dTAG_DMSO, updown_A485_DMSO) %>%
    dplyr::filter(Anno2 %in% loopList,
                  updown_dTAG_DMSO %in% c("UP", "NO", "DOWN"))
  p <- ggplot(temp, aes(x = updown_dTAG_DMSO, y = distance)) +
    geom_violin(aes(fill = updown_dTAG_DMSO)) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = updown_dTAG_DMSO), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() + ggtitle(note) +
    scale_y_continuous(labels = label_kb_mb)
  fileName <- paste0("size_barplot_", name, "_dTAG_vs_DMSO_", note, "_", diffCutoff)
  height <- 3
  width <- 4
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
  
  
  temp <- data %>% dplyr::select(Anno2, distance, updown_dTAG_DMSO, updown_A485_DMSO) %>%
    dplyr::filter(Anno2 %in% loopList,
                  updown_A485_DMSO %in% c("UP", "NO",  "DOWN"))
  p <- ggplot(temp, aes(x = updown_A485_DMSO, y = distance)) +
    geom_violin(aes(fill = updown_A485_DMSO)) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = updown_A485_DMSO), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() + ggtitle(note) +
    scale_y_continuous(labels = label_kb_mb)
  fileName <- paste0("size_barplot_", name, "_A485_vs_DMSO_", note, "_", diffCutoff)
  height <- 3
  width <- 4
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}


name
diffCutoff <- 0.2
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
data <- data %>%
  dplyr::mutate(updown_dTAG_DMSO = ifelse(diff_dTAG_DMSO > diffCutoff, "UP", 
                                          ifelse(diff_dTAG_DMSO > -diffCutoff, "NO", "DOWN")),
                updown_A485_DMSO = ifelse(diff_A485_DMSO > diffCutoff, "UP", 
                                          ifelse(diff_A485_DMSO > -diffCutoff, "NO", "DOWN")),
                distance = start2 - start1)
data$updown_dTAG_DMSO <- factor(data$updown_dTAG_DMSO, levels = c("UP", "NO", "DOWN"))
data$updown_A485_DMSO <- factor(data$updown_A485_DMSO, levels = c("UP", "NO", "DOWN"))
create_dist_barplot(data, figDir, name, "p-n", c("P-P", "P-E", "P-S", "P-X"), 0.2)
create_dist_barplot(data, figDir, name, "p-pe", c("P-P", "P-E"), 0.2)
create_dist_barplot(data, figDir, name, "pe-pe", c("P-P", "P-E", "E-E"), 0.2)
create_dist_barplot(data, figDir, name, "str", c("S-S", "S-X"), 0.2)

[2.10] Linking with RNA-seq/PRO-seq

Function & parameter

GOdir <- here("../..", "result", "loop", "GO")
dir.create(GOdir, showWarnings = FALSE, recursive = TRUE)

getGO <- function(name, figDir, geneList, categoryNum = 15, height = 10, width = 7){
  GO <- enrichGO(gene = geneList, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
  GO.readable <- setReadable(GO, OrgDb = org.Mm.eg.db)
  fwrite(as.data.frame(GO), here(GOdir, paste0("GO_", name, "_ensembl.tsv")), sep = "\t")
  fwrite(as.data.frame(GO.readable), here(GOdir, paste0("GO_", name, "_readable.tsv")), sep = "\t")
  
  if(nrow(as.data.frame(GO)) != 0){
    #####
    fileName <- paste0("GO_", name)
    svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
    print(dotplot(GO, showCategory = categoryNum, title = name) + 
            scale_color_continuous(limits = c(0, 0.05), low = "red", high = "black"))
    dev.off()
    png(here(figDir, paste0(fileName, ".png")), height = height, width = width, res = 600, unit = "in")
    print(dotplot(GO, showCategory = categoryNum, title = name) + 
            scale_color_continuous(limits = c(0, 0.05), low = "red", high = "black"))
    dev.off()
  } 
}

convPvalue <- function(pvalue){
  out <- ifelse(pvalue < 0.0001, "****",
                ifelse(pvalue < 0.001, "***",
                       ifelse(pvalue < 0.01, "**",
                              ifelse(pvalue < 0.05, "*", "ns"))))
  return(out)
}

loadLoopAnnoData <- function(fileName, diffCutoff = 0.2, annoList = c("P-P", "P-E", "P-S", "P-X")){
  data <- fread(fileName) %>%
    dplyr::mutate(updown_dTAG_DMSO = ifelse(diff_dTAG_DMSO > diffCutoff, "UP", 
                                            ifelse(diff_dTAG_DMSO > -diffCutoff, "NO", "DOWN")),
                  updown_A485_DMSO = ifelse(diff_A485_DMSO > diffCutoff, "UP", 
                                            ifelse(diff_A485_DMSO > -diffCutoff, "NO", "DOWN")),
                  gene = strsplit(gene, '\\|')) %>%
    dplyr::filter(Anno2 %in%  annoList)
  data$updown_dTAG_DMSO <- factor(data$updown_dTAG_DMSO, levels = c("UP", "NO", "DOWN"))
  data$updown_A485_DMSO <- factor(data$updown_A485_DMSO, levels = c("UP", "NO", "DOWN"))
  
  return(data)
}

dTAG Scatterplot

loopType <- "p-n"

# getting list of genes of interest from RNA-seq and PRO-seq
alpha <- 0.05
fcCutoff <- 0.5
diff.PRO.G1.dTAG <- fread(here(refDir, "diff_G1G2.dTAG_G1.dTAG_vs_G1.DMSO_PROseq.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.PRO.G2.dTAG <- fread(here(refDir, "diff_G1G2.dTAG_G2.dTAG_vs_G2.DMSO_PROseq.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG.noFCcutoff <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::filter(padj < alpha)
# Importing loop gene annotation
name <- "chromo_cons_annoHierarchy"

diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = diffCutoff,
                                      annoList = c("P-P", "P-E", "P-S", "P-X"))%>%
  dplyr::mutate(distance = start2 - start1)
# geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
#                                       diffCutoff = diffCutoff,
#                                       annoList = c("P-P"))%>% 
#   dplyr::mutate(distance = start2 - start1)
# Testing different ways to calculate representative feature of loops for each gene
diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
geneList.down <- unique(c((diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id,
                          (diff.PRO.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id,
                          (diff.PRO.G2.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id))
geneList.down.RNA <- (diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id
geneList.down.RNA.noFCcutoff <- (diff.RNA.G1.dTAG.noFCcutoff %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id
geneList.updown.RNA.noFCcutoff <- (diff.RNA.G1.dTAG.noFCcutoff)$ensembl_gene_id

geneList.up <- unique(c((diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id,
                        (diff.PRO.G1.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id,
                        (diff.PRO.G2.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id))


# [1] Average of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 2

temp <- geneAnnoData %>% dplyr::select(diff_dTAG_DMSO, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = mean(diff_dTAG_DMSO), .groups = 'drop')

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
                              ifelse(gene %in% c(), "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)

#fwrite(temp, here(consensusDir, "gene_avgScore_fc_dTAG_p-n.tsv"), sep = "\t")

# 
# p <- ggplot(temp, aes(x = shrlog2fcMax, y = mean_diff_score, color = flag, 
#                       label = ifelse(flag != "0NO", external_gene_name, NA),
#                       shape = as.factor(maxFlag))) +
#   geom_point() + geom_text_repel() + theme_classic() + 
#   ggtitle("dTAG") +
#   geom_hline(yintercept = 0) +
#   geom_hline(yintercept = diffCutoff, linetype = "dashed") +
#   geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
#   geom_vline(xintercept = 0) +
#   geom_vline(xintercept = fcCutoff, linetype = "dashed") +
#   geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
#   scale_color_manual(values = c("0NO" = "grey", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
#   scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))
# 
# fileName <- paste0("log2FC_vs_avgScore_dTAG_", loopType, "_diffCutoff_", diffCutoff)
# height <- 4
# width <- 7
# png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
# print(p)
# dev.off()
# svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
# print(p)
# dev.off()

# No FC cutoff option - down only
temp <- geneAnnoData %>% dplyr::select(diff_dTAG_DMSO, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = mean(diff_dTAG_DMSO), .groups = 'drop')

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA.noFCcutoff, "2DOWN",
                              ifelse(gene %in% c(), "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)

# fwrite(temp, here(consensusDir, paste0("gene_avgScore_fc_dTAG_", loopType, "_noFCcutoff.tsv")), sep = "\t")

p <- ggplot(temp, aes(x = shrinked_log2FC, y = mean_diff_score, color = flag, 
                      label = ifelse(flag != "0NO", external_gene_name, NA),
                      shape = as.factor(maxFlag))) +
  geom_point(size = 1, alpha = 1,
      stroke = 0) + geom_text_repel() + theme_classic() + 
  guides(color = "none", shape = "none") + 
  geom_hline(yintercept = 0, alpha = 1,
             color = "black",
             size = lineThick*mmToLineUnit,
             lineend = "square") +
  geom_hline(yintercept = c(diffCutoff, -diffCutoff), linetype = "dashed", alpha = 1,
             color = "black",
             size = lineThick*mmToLineUnit,
             lineend = "square") +
  geom_vline(xintercept = 0, alpha = 1,
             color = "black",
             size = lineThick*mmToLineUnit,
             lineend = "square") +
  geom_vline(xintercept = c(fcCutoff, -fcCutoff), linetype = "dashed", alpha = 1,
             color = "black",
             size = lineThick*mmToLineUnit,
             lineend = "square") +
  theme(
    axis.title.x = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.title.y = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),    
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  ) + labs(y = "Avg. Δ loop score", x = "log2(shrunken FC)") +
  scale_color_manual(values = c("0NO" = "#A9A8A9", "1UP" = "#CB333A", "2DOWN" = "#4852A0")) +  # Corrected color mapping
  scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))

fileName <- paste0("log2FC_vs_avgScore_dTAG_", loopType, "_diffCutoff_", diffCutoff, "_noFCcutoff")
height <- 42*mmToInch
width <- 50*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

# No FC cutoff option - up and down
temp <- geneAnnoData %>% dplyr::select(diff_dTAG_DMSO, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = mean(diff_dTAG_DMSO), .groups = 'drop')

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.updown.RNA.noFCcutoff, "2DOWN",
                              ifelse(gene %in% c(), "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)


p <- ggplot(temp, aes(x = shrinked_log2FC, y = mean_diff_score, color = flag, 
                      label = ifelse(flag != "0NO", external_gene_name, NA),
                      shape = as.factor(maxFlag))) +
  geom_point() + geom_text_repel() + theme_classic() + 
  ggtitle("dTAG") +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = diffCutoff, linetype = "dashed") +
  geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
  geom_vline(xintercept = 0) +
  geom_vline(xintercept = fcCutoff, linetype = "dashed") +
  geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
  scale_color_manual(values = c("0NO" = "grey", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
  scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))

fileName <- paste0("log2FC_vs_avgScore_dTAG_", loopType, "_diffCutoff_", diffCutoff, "_noFCcutoffupdown")
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

# 
# ## RNA + PRO-seq
# temp <- geneAnnoData %>% dplyr::select(diff_dTAG_DMSO, gene) %>% 
#   unnest(gene) %>% group_by(gene) %>%
#   summarize(mean_diff_score = mean(diff_dTAG_DMSO), .groups = 'drop')
# 
# temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
#   dplyr::mutate(flag = ifelse(gene %in% geneList.down, "2DOWN",
#                               ifelse(gene %in% geneList.up, "1UP", "0NO")),
#                 maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
#                 shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
#   dplyr::arrange(flag)
# 
# fwrite(temp, here(consensusDir, "gene_avgScore_fc_dTAG_", loopType, ".tsv"), sep = "\t")
# 
# p <- ggplot(temp, aes(x = shrlog2fcMax, y = mean_diff_score, color = flag, 
#                       label = ifelse(flag != "0NO", external_gene_name, NA),
#                       shape = as.factor(maxFlag))) +
#   geom_point() + geom_text_repel() + theme_classic() + 
#   ggtitle("dTAG") +
#   geom_hline(yintercept = 0) +
#   geom_hline(yintercept = diffCutoff, linetype = "dashed") +
#   geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
#   geom_vline(xintercept = 0) +
#   geom_vline(xintercept = fcCutoff, linetype = "dashed") +
#   geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
#   scale_color_manual(values = c("0NO" = "grey", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
#   scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))
# 
# fileName <- paste0("log2FC_vs_avgScore_dTAG_", loopType, "_diffCutoff_", diffCutoff, "_RNA-PRO")
# height <- 4
# width <- 7
# png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
# print(p)
# dev.off()
# svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
# print(p)
# dev.off()

dTAG Scatterplot OE

Avg

loopType <- "p-pe"

# getting list of genes of interest from RNA-seq and PRO-seq
alpha <- 0.05
fcCutoff <- 0.5
diff.PRO.G1.dTAG <- fread(here(refDir, "diff_G1G2.dTAG_G1.dTAG_vs_G1.DMSO_PROseq.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.PRO.G2.dTAG <- fread(here(refDir, "diff_G1G2.dTAG_G2.dTAG_vs_G2.DMSO_PROseq.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG.noFCcutoff <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::filter(padj < alpha)
# Importing loop gene annotation
name <- "chromo_cons_annoHierarchy"

diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = diffCutoff,
                                      annoList = c("P-P", "P-E", "P-S", "P-X"))%>% 
  dplyr::mutate(distance = start2 - start1)


# Loading OE loop score
minValue <- -4
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
  dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
                log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)),
                log_obsexp_diff_dTAG_DMSO = log_obsexp_dTAG - log_obsexp_DMSO,
                log_obsexp_diff_A485_DMSO = log_obsexp_A485 - log_obsexp_DMSO)

geneAnnoData <- geneAnnoData %>% left_join(obsexp, by = c("id"))

# Testing different ways to calculate representative feature of loops for each gene
diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
geneList.down <- unique(c((diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id,
                          (diff.PRO.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id,
                          (diff.PRO.G2.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id))
geneList.down.RNA <- (diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id
geneList.down.RNA.noFCcutoff <- (diff.RNA.G1.dTAG.noFCcutoff %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id
geneList.updown.RNA.noFCcutoff <- (diff.RNA.G1.dTAG.noFCcutoff)$ensembl_gene_id

geneList.up <- unique(c((diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id,
                        (diff.PRO.G1.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id,
                        (diff.PRO.G2.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id))


# [1] Average of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 2

temp <- geneAnnoData %>% dplyr::select(log_obsexp_diff_dTAG_DMSO, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = mean(log_obsexp_diff_dTAG_DMSO), .groups = 'drop')

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
                              ifelse(gene %in% c(), "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)

#fwrite(temp, here(consensusDir, "gene_avgScore_fc_dTAG_p-n.tsv"), sep = "\t")

p <- ggplot(temp, aes(x = shrlog2fcMax, y = mean_diff_score, color = flag, 
                      label = ifelse(flag != "0NO", external_gene_name, NA),
                      shape = as.factor(maxFlag))) +
  geom_point() + geom_text_repel() + theme_classic() + 
  ggtitle("dTAG") +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = diffCutoff, linetype = "dashed") +
  geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
  geom_vline(xintercept = 0) +
  geom_vline(xintercept = fcCutoff, linetype = "dashed") +
  geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
  scale_color_manual(values = c("0NO" = "grey", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
  scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))

fileName <- paste0("log2FC_vs_avgScore_dTAG_OE_", loopType, "_avgLogOE")
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

# No FC cutoff option - down only
temp <- geneAnnoData %>% dplyr::select(log_obsexp_diff_dTAG_DMSO, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = mean(log_obsexp_diff_dTAG_DMSO), .groups = 'drop')

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA.noFCcutoff, "2DOWN",
                              ifelse(gene %in% c(), "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)


p <- ggplot(temp, aes(x = shrinked_log2FC, y = mean_diff_score, color = flag, 
                      label = ifelse(flag != "0NO", external_gene_name, NA),
                      shape = as.factor(maxFlag))) +
  geom_point() + geom_text_repel() + theme_classic() + 
  ggtitle("dTAG") +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = diffCutoff, linetype = "dashed") +
  geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
  geom_vline(xintercept = 0) +
  geom_vline(xintercept = fcCutoff, linetype = "dashed") +
  geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
  scale_color_manual(values = c("0NO" = "grey", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
  scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))

fileName <- paste0("log2FC_vs_avgScore_dTAG_OE_", loopType, "_avgLogOE_noFCcutoff")
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
Sum
# getting list of genes of interest from RNA-seq and PRO-seq
alpha <- 0.05
fcCutoff <- 0.5
diff.PRO.G1.dTAG <- fread(here(refDir, "diff_G1G2.dTAG_G1.dTAG_vs_G1.DMSO_PROseq.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.PRO.G2.dTAG <- fread(here(refDir, "diff_G1G2.dTAG_G2.dTAG_vs_G2.DMSO_PROseq.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG.noFCcutoff <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::filter(padj < alpha)
# Importing loop gene annotation
name <- "chromo_cons_annoHierarchy"

diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = diffCutoff,
                                      annoList = c("P-P", "P-E", "P-S", "P-X"))%>% 
  dplyr::mutate(distance = start2 - start1)


# Loading OE loop score
minValue <- -4
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
  dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
                log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)),
                log_obsexp_diff_dTAG_DMSO = log_obsexp_dTAG - log_obsexp_DMSO,
                log_obsexp_diff_A485_DMSO = log_obsexp_A485 - log_obsexp_DMSO)

geneAnnoData <- geneAnnoData %>% left_join(obsexp, by = c("id"))

# Testing different ways to calculate representative feature of loops for each gene
diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
geneList.down <- unique(c((diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id,
                          (diff.PRO.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id,
                          (diff.PRO.G2.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id))
geneList.down.RNA <- (diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id
geneList.down.RNA.noFCcutoff <- (diff.RNA.G1.dTAG.noFCcutoff %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id
geneList.updown.RNA.noFCcutoff <- (diff.RNA.G1.dTAG.noFCcutoff)$ensembl_gene_id

geneList.up <- unique(c((diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id,
                        (diff.PRO.G1.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id,
                        (diff.PRO.G2.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id))


# [1] Average of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 2

temp <- geneAnnoData %>% dplyr::select(log_obsexp_diff_dTAG_DMSO, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = sum(log_obsexp_diff_dTAG_DMSO), .groups = 'drop')

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
                              ifelse(gene %in% c(), "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)

#fwrite(temp, here(consensusDir, "gene_avgScore_fc_dTAG_p-n.tsv"), sep = "\t")

p <- ggplot(temp, aes(x = shrlog2fcMax, y = mean_diff_score, color = flag, 
                      label = ifelse(flag != "0NO", external_gene_name, NA),
                      shape = as.factor(maxFlag))) +
  geom_point() + geom_text_repel() + theme_classic() + 
  ggtitle("dTAG") +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = diffCutoff, linetype = "dashed") +
  geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
  geom_vline(xintercept = 0) +
  geom_vline(xintercept = fcCutoff, linetype = "dashed") +
  geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
  scale_color_manual(values = c("0NO" = "grey", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
  scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))

fileName <- paste0("log2FC_vs_avgScore_dTAG_OE_p-n_sumLogOE")
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

# No FC cutoff option - down only
temp <- geneAnnoData %>% dplyr::select(log_obsexp_diff_dTAG_DMSO, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = sum(log_obsexp_diff_dTAG_DMSO), .groups = 'drop')

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA.noFCcutoff, "2DOWN",
                              ifelse(gene %in% c(), "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)


p <- ggplot(temp, aes(x = shrinked_log2FC, y = mean_diff_score, color = flag, 
                      label = ifelse(flag != "0NO", external_gene_name, NA),
                      shape = as.factor(maxFlag))) +
  geom_point() + geom_text_repel() + theme_classic() + 
  ggtitle("dTAG") +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = diffCutoff, linetype = "dashed") +
  geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
  geom_vline(xintercept = 0) +
  geom_vline(xintercept = fcCutoff, linetype = "dashed") +
  geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
  scale_color_manual(values = c("0NO" = "grey", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
  scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))

fileName <- paste0("log2FC_vs_avgScore_dTAG_OE_p-n_sumLogOE_noFCcutoff")
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
abs max
# getting list of genes of interest from RNA-seq and PRO-seq
alpha <- 0.05
fcCutoff <- 0.5
diff.PRO.G1.dTAG <- fread(here(refDir, "diff_G1G2.dTAG_G1.dTAG_vs_G1.DMSO_PROseq.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.PRO.G2.dTAG <- fread(here(refDir, "diff_G1G2.dTAG_G2.dTAG_vs_G2.DMSO_PROseq.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG.noFCcutoff <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::filter(padj < alpha)
# Importing loop gene annotation
name <- "chromo_cons_annoHierarchy"

diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = diffCutoff,
                                      annoList = c("P-P", "P-E", "P-S", "P-X"))%>% 
  dplyr::mutate(distance = start2 - start1)


# Loading OE loop score
minValue <- -4
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
  dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
                log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)),
                log_obsexp_diff_dTAG_DMSO = log_obsexp_dTAG - log_obsexp_DMSO,
                log_obsexp_diff_A485_DMSO = log_obsexp_A485 - log_obsexp_DMSO)

geneAnnoData <- geneAnnoData %>% left_join(obsexp, by = c("id"))

# Testing different ways to calculate representative feature of loops for each gene
diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
geneList.down <- unique(c((diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id,
                          (diff.PRO.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id,
                          (diff.PRO.G2.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id))
geneList.down.RNA <- (diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id
geneList.down.RNA.noFCcutoff <- (diff.RNA.G1.dTAG.noFCcutoff %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id
geneList.updown.RNA.noFCcutoff <- (diff.RNA.G1.dTAG.noFCcutoff)$ensembl_gene_id

geneList.up <- unique(c((diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id,
                        (diff.PRO.G1.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id,
                        (diff.PRO.G2.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id))


# [1] Average of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 2

temp <- geneAnnoData %>% dplyr::select(log_obsexp_diff_dTAG_DMSO, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = log_obsexp_diff_dTAG_DMSO[which.max(abs(log_obsexp_diff_dTAG_DMSO))], .groups = 'drop')


temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
                              ifelse(gene %in% c(), "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)

#fwrite(temp, here(consensusDir, "gene_avgScore_fc_dTAG_p-n.tsv"), sep = "\t")

p <- ggplot(temp, aes(x = shrlog2fcMax, y = mean_diff_score, color = flag, 
                      label = ifelse(flag != "0NO", external_gene_name, NA),
                      shape = as.factor(maxFlag))) +
  geom_point() + geom_text_repel() + theme_classic() + 
  ggtitle("dTAG") +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = diffCutoff, linetype = "dashed") +
  geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
  geom_vline(xintercept = 0) +
  geom_vline(xintercept = fcCutoff, linetype = "dashed") +
  geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
  scale_color_manual(values = c("0NO" = "grey", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
  scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))

fileName <- paste0("log2FC_vs_avgScore_dTAG_OE_p-n_absLogOE")
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

# No FC cutoff option - down only
temp <- geneAnnoData %>% dplyr::select(log_obsexp_diff_dTAG_DMSO, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = log_obsexp_diff_dTAG_DMSO[which.max(abs(log_obsexp_diff_dTAG_DMSO))], .groups = 'drop')

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA.noFCcutoff, "2DOWN",
                              ifelse(gene %in% c(), "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)


p <- ggplot(temp, aes(x = shrinked_log2FC, y = mean_diff_score, color = flag, 
                      label = ifelse(flag != "0NO", external_gene_name, NA),
                      shape = as.factor(maxFlag))) +
  geom_point() + geom_text_repel() + theme_classic() + 
  ggtitle("dTAG") +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = diffCutoff, linetype = "dashed") +
  geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
  geom_vline(xintercept = 0) +
  geom_vline(xintercept = fcCutoff, linetype = "dashed") +
  geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
  scale_color_manual(values = c("0NO" = "grey", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
  scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))

fileName <- paste0("log2FC_vs_avgScore_dTAG_OE_p-n_absLogOE_noFCcutoff")
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
Closest P-N loop
# getting list of genes of interest from RNA-seq and PRO-seq
alpha <- 0.05
fcCutoff <- 0.5
diff.PRO.G1.dTAG <- fread(here(refDir, "diff_G1G2.dTAG_G1.dTAG_vs_G1.DMSO_PROseq.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.PRO.G2.dTAG <- fread(here(refDir, "diff_G1G2.dTAG_G2.dTAG_vs_G2.DMSO_PROseq.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG.noFCcutoff <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::filter(padj < alpha)
# Importing loop gene annotation
name <- "chromo_cons_annoHierarchy"

diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = diffCutoff,
                                      annoList = c("P-P", "P-E", "P-S", "P-X"))%>% 
  dplyr::mutate(distance = start2 - start1)


# Loading OE loop score
minValue <- -4
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
  dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
                log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)),
                log_obsexp_diff_dTAG_DMSO = log_obsexp_dTAG - log_obsexp_DMSO,
                log_obsexp_diff_A485_DMSO = log_obsexp_A485 - log_obsexp_DMSO)

geneAnnoData <- geneAnnoData %>% left_join(obsexp, by = c("id"))

# Testing different ways to calculate representative feature of loops for each gene
diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
geneList.down <- unique(c((diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id,
                          (diff.PRO.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id,
                          (diff.PRO.G2.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id))
geneList.down.RNA <- (diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id
geneList.down.RNA.noFCcutoff <- (diff.RNA.G1.dTAG.noFCcutoff %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id
geneList.updown.RNA.noFCcutoff <- (diff.RNA.G1.dTAG.noFCcutoff)$ensembl_gene_id

geneList.up <- unique(c((diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id,
                        (diff.PRO.G1.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id,
                        (diff.PRO.G2.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id))


# [1] Average of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 2

temp <- geneAnnoData %>% dplyr::select(log_obsexp_diff_dTAG_DMSO, gene, distance) %>% 
  unnest(gene) %>% group_by(gene) %>% 
  slice_min(distance, with_ties = FALSE) %>% 
  summarize(mean_diff_score = log_obsexp_diff_dTAG_DMSO, .groups = 'drop')

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
                              ifelse(gene %in% c(), "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)

#fwrite(temp, here(consensusDir, "gene_avgScore_fc_dTAG_p-n.tsv"), sep = "\t")

p <- ggplot(temp, aes(x = shrlog2fcMax, y = mean_diff_score, color = flag, 
                      label = ifelse(flag != "0NO", external_gene_name, NA),
                      shape = as.factor(maxFlag))) +
  geom_point() + geom_text_repel() + theme_classic() + 
  ggtitle("dTAG") +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = diffCutoff, linetype = "dashed") +
  geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
  geom_vline(xintercept = 0) +
  geom_vline(xintercept = fcCutoff, linetype = "dashed") +
  geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
  scale_color_manual(values = c("0NO" = "grey", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
  scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))

fileName <- paste0("log2FC_vs_avgScore_dTAG_OE_p-n_closestLogOE")
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

# No FC cutoff option - down only
temp <- geneAnnoData %>% dplyr::select(log_obsexp_diff_dTAG_DMSO, gene, distance) %>% 
  unnest(gene) %>% group_by(gene) %>%
  slice_min(distance, with_ties = FALSE) %>% 
  summarize(mean_diff_score = log_obsexp_diff_dTAG_DMSO, .groups = 'drop')

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA.noFCcutoff, "2DOWN",
                              ifelse(gene %in% c(), "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)


p <- ggplot(temp, aes(x = shrinked_log2FC, y = mean_diff_score, color = flag, 
                      label = ifelse(flag != "0NO", external_gene_name, NA),
                      shape = as.factor(maxFlag))) +
  geom_point() + geom_text_repel() + theme_classic() + 
  ggtitle("dTAG") +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = diffCutoff, linetype = "dashed") +
  geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
  geom_vline(xintercept = 0) +
  geom_vline(xintercept = fcCutoff, linetype = "dashed") +
  geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
  scale_color_manual(values = c("0NO" = "grey", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
  scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))

fileName <- paste0("log2FC_vs_avgScore_dTAG_OE_p-n_closestLogOE_noFCcutoff")
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
Farthest P-N loop
# getting list of genes of interest from RNA-seq and PRO-seq
alpha <- 0.05
fcCutoff <- 0.5
diff.PRO.G1.dTAG <- fread(here(refDir, "diff_G1G2.dTAG_G1.dTAG_vs_G1.DMSO_PROseq.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.PRO.G2.dTAG <- fread(here(refDir, "diff_G1G2.dTAG_G2.dTAG_vs_G2.DMSO_PROseq.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG.noFCcutoff <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::filter(padj < alpha)
# Importing loop gene annotation
name <- "chromo_cons_annoHierarchy"

diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = diffCutoff,
                                      annoList = c("P-P", "P-E", "P-S", "P-X"))%>% 
  dplyr::mutate(distance = start2 - start1)


# Loading OE loop score
minValue <- -4
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
  dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
                log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)),
                log_obsexp_diff_dTAG_DMSO = log_obsexp_dTAG - log_obsexp_DMSO,
                log_obsexp_diff_A485_DMSO = log_obsexp_A485 - log_obsexp_DMSO)

geneAnnoData <- geneAnnoData %>% left_join(obsexp, by = c("id"))

# Testing different ways to calculate representative feature of loops for each gene
diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
geneList.down <- unique(c((diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id,
                          (diff.PRO.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id,
                          (diff.PRO.G2.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id))
geneList.down.RNA <- (diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id
geneList.down.RNA.noFCcutoff <- (diff.RNA.G1.dTAG.noFCcutoff %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id
geneList.updown.RNA.noFCcutoff <- (diff.RNA.G1.dTAG.noFCcutoff)$ensembl_gene_id

geneList.up <- unique(c((diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id,
                        (diff.PRO.G1.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id,
                        (diff.PRO.G2.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id))


# [1] Average of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 2

temp <- geneAnnoData %>% dplyr::select(log_obsexp_diff_dTAG_DMSO, gene, distance) %>% 
  unnest(gene) %>% group_by(gene) %>% 
  slice_max(distance, with_ties = FALSE) %>% 
  summarize(mean_diff_score = log_obsexp_diff_dTAG_DMSO, .groups = 'drop')

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
                              ifelse(gene %in% c(), "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)

#fwrite(temp, here(consensusDir, "gene_avgScore_fc_dTAG_p-n.tsv"), sep = "\t")

p <- ggplot(temp, aes(x = shrlog2fcMax, y = mean_diff_score, color = flag, 
                      label = ifelse(flag != "0NO", external_gene_name, NA),
                      shape = as.factor(maxFlag))) +
  geom_point() + geom_text_repel() + theme_classic() + 
  ggtitle("dTAG") +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = diffCutoff, linetype = "dashed") +
  geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
  geom_vline(xintercept = 0) +
  geom_vline(xintercept = fcCutoff, linetype = "dashed") +
  geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
  scale_color_manual(values = c("0NO" = "grey", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
  scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))

fileName <- paste0("log2FC_vs_avgScore_dTAG_OE_p-n_farthestLogOE")
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

# No FC cutoff option - down only
temp <- geneAnnoData %>% dplyr::select(log_obsexp_diff_dTAG_DMSO, gene, distance) %>% 
  unnest(gene) %>% group_by(gene) %>%
  slice_max(distance, with_ties = FALSE) %>% 
  summarize(mean_diff_score = log_obsexp_diff_dTAG_DMSO, .groups = 'drop')

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA.noFCcutoff, "2DOWN",
                              ifelse(gene %in% c(), "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)


p <- ggplot(temp, aes(x = shrinked_log2FC, y = mean_diff_score, color = flag, 
                      label = ifelse(flag != "0NO", external_gene_name, NA),
                      shape = as.factor(maxFlag))) +
  geom_point() + geom_text_repel() + theme_classic() + 
  ggtitle("dTAG") +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = diffCutoff, linetype = "dashed") +
  geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
  geom_vline(xintercept = 0) +
  geom_vline(xintercept = fcCutoff, linetype = "dashed") +
  geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
  scale_color_manual(values = c("0NO" = "grey", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
  scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))

fileName <- paste0("log2FC_vs_avgScore_dTAG_OE_p-n_farthestLogOE_noFCcutoff")
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

A485 Scatterplot

P-N
# getting list of genes of interest from RNA-seq and PRO-seq
alpha <- 0.05
fcCutoff <- 0.5
diff.RNA.G1.A485 <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)

# Importing loop gene annotation
name <- "chromo_cons_annoHierarchy"

diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = diffCutoff,
                                      annoList = c("P-P", "P-E", "P-S", "P-X"))%>% 
  dplyr::mutate(distance = start2 - start1)

# Testing different ways to calculate representative feature of loops for each gene
diff.RNA <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
geneList.down.RNA <- (diff.RNA.G1.A485 %>% dplyr::filter(shrinked_log2FC < -fcCutoff))$ensembl_gene_id
geneList.up.RNA <- (diff.RNA.G1.A485 %>% dplyr::filter(shrinked_log2FC > fcCutoff))$ensembl_gene_id


# [1] Average of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 4

temp <- geneAnnoData %>% dplyr::select(diff_A485_DMSO, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = mean(diff_A485_DMSO), .groups = 'drop')

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
                              ifelse(gene %in% geneList.up.RNA, "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)

fwrite(temp, here(consensusDir, "gene_avgScore_fc_dTAG_p-n.tsv"), sep = "\t")

p <- ggplot(temp, aes(x = shrlog2fcMax, y = mean_diff_score, color = flag, 
                      label = ifelse(flag != "0NO", external_gene_name, NA),
                      shape = as.factor(maxFlag))) +
  geom_point() + geom_text_repel() + theme_classic() + 
  ggtitle("A485") +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = diffCutoff, linetype = "dashed") +
  geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
  geom_vline(xintercept = 0) +
  geom_vline(xintercept = fcCutoff, linetype = "dashed") +
  geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
  scale_color_manual(values = c("0NO" = "black", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
  scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))

fileName <- paste0("log2FC_vs_avgScore_A485_p-n_diffCutoff_", diffCutoff)
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

# [2] Sum of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 4

temp <- geneAnnoData %>% dplyr::select(diff_A485_DMSO, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(sum_diff_score = sum(diff_A485_DMSO), .groups = 'drop')

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
                              ifelse(gene %in% geneList.up.RNA, "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)

fwrite(temp, here(consensusDir, "gene_sumScore_fc_dTAG_p-n.tsv"), sep = "\t")

p <- ggplot(temp, aes(x = shrlog2fcMax, y = sum_diff_score, color = flag, 
                      label = ifelse(flag != "0NO", external_gene_name, NA),
                      shape = as.factor(maxFlag))) +
  geom_point() + geom_text_repel() + theme_classic() + 
  ggtitle("A485") +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = diffCutoff, linetype = "dashed") +
  geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
  geom_vline(xintercept = 0) +
  geom_vline(xintercept = fcCutoff, linetype = "dashed") +
  geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
  scale_color_manual(values = c("0NO" = "black", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
  scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))

fileName <- paste0("log2FC_vs_sumScore_A485_p-n_diffCutoff_", diffCutoff)
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()


# [3] Max of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 4

temp <- geneAnnoData %>% dplyr::select(diff_A485_DMSO, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(max_abs_diff_score = diff_A485_DMSO[which.max(abs(diff_A485_DMSO))], .groups = 'drop')

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
                              ifelse(gene %in% geneList.up.RNA, "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)

fwrite(temp, here(consensusDir, "gene_maxAbsScore_fc_dTAG_p-n.tsv"), sep = "\t")

p <- ggplot(temp, aes(x = shrlog2fcMax, y = max_abs_diff_score, color = flag, 
                      label = ifelse(flag != "0NO", external_gene_name, NA),
                      shape = as.factor(maxFlag))) +
  geom_point() + geom_text_repel() + theme_classic() + 
  ggtitle("A485") +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = diffCutoff, linetype = "dashed") +
  geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
  geom_vline(xintercept = 0) +
  geom_vline(xintercept = fcCutoff, linetype = "dashed") +
  geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
  scale_color_manual(values = c("0NO" = "black", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
  scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))

fileName <- paste0("log2FC_vs_maxAbsScore_A485_p-n_diffCutoff_", diffCutoff)
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
P-PE
# getting list of genes of interest from RNA-seq and PRO-seq
alpha <- 0.05
fcCutoff <- 0.5
diff.RNA.G1.A485 <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)

# Importing loop gene annotation
name <- "chromo_cons_annoHierarchy"

diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = diffCutoff,
                                      annoList = c("P-P", "P-E"))%>% 
  dplyr::mutate(distance = start2 - start1)

# Testing different ways to calculate representative feature of loops for each gene
diff.RNA <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
geneList.down.RNA <- (diff.RNA.G1.A485 %>% dplyr::filter(shrinked_log2FC < -fcCutoff))$ensembl_gene_id
geneList.up.RNA <- (diff.RNA.G1.A485 %>% dplyr::filter(shrinked_log2FC > fcCutoff))$ensembl_gene_id


# [1] Average of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 4

temp <- geneAnnoData %>% dplyr::select(diff_A485_DMSO, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = mean(diff_A485_DMSO), .groups = 'drop')

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
                              ifelse(gene %in% geneList.up.RNA, "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)

fwrite(temp, here(consensusDir, "gene_avgScore_fc_dTAG_p-pe.tsv"), sep = "\t")

p <- ggplot(temp, aes(x = shrlog2fcMax, y = mean_diff_score, color = flag, 
                      label = ifelse(flag != "0NO", external_gene_name, NA),
                      shape = as.factor(maxFlag))) +
  geom_point() + geom_text_repel() + theme_classic() + 
  ggtitle("A485") +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = diffCutoff, linetype = "dashed") +
  geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
  geom_vline(xintercept = 0) +
  geom_vline(xintercept = fcCutoff, linetype = "dashed") +
  geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
  scale_color_manual(values = c("0NO" = "black", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
  scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))

fileName <- paste0("log2FC_vs_avgScore_A485_p-pe_diffCutoff_", diffCutoff)
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

# [2] Sum of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 4

temp <- geneAnnoData %>% dplyr::select(diff_A485_DMSO, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(sum_diff_score = sum(diff_A485_DMSO), .groups = 'drop')

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
                              ifelse(gene %in% geneList.up.RNA, "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)

fwrite(temp, here(consensusDir, "gene_sumScore_fc_dTAG_p-pe.tsv"), sep = "\t")

p <- ggplot(temp, aes(x = shrlog2fcMax, y = sum_diff_score, color = flag, 
                      label = ifelse(flag != "0NO", external_gene_name, NA),
                      shape = as.factor(maxFlag))) +
  geom_point() + geom_text_repel() + theme_classic() + 
  ggtitle("A485") +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = diffCutoff, linetype = "dashed") +
  geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
  geom_vline(xintercept = 0) +
  geom_vline(xintercept = fcCutoff, linetype = "dashed") +
  geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
  scale_color_manual(values = c("0NO" = "black", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
  scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))

fileName <- paste0("log2FC_vs_sumScore_A485_p-pe_diffCutoff_", diffCutoff)
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()


# [3] Max of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 4

temp <- geneAnnoData %>% dplyr::select(diff_A485_DMSO, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(max_abs_diff_score = diff_A485_DMSO[which.max(abs(diff_A485_DMSO))], .groups = 'drop')

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
                              ifelse(gene %in% geneList.up.RNA, "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)

fwrite(temp, here(consensusDir, "gene_maxAbsScore_fc_dTAG_p-pe.tsv"), sep = "\t")

p <- ggplot(temp, aes(x = shrlog2fcMax, y = max_abs_diff_score, color = flag, 
                      label = ifelse(flag != "0NO", external_gene_name, NA),
                      shape = as.factor(maxFlag))) +
  geom_point() + geom_text_repel() + theme_classic() + 
  ggtitle("A485") +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = diffCutoff, linetype = "dashed") +
  geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
  geom_vline(xintercept = 0) +
  geom_vline(xintercept = fcCutoff, linetype = "dashed") +
  geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
  scale_color_manual(values = c("0NO" = "black", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
  scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))

fileName <- paste0("log2FC_vs_maxAbsScore_A485_p-pe_diffCutoff_", diffCutoff)
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
P-S
# getting list of genes of interest from RNA-seq and PRO-seq
alpha <- 0.05
fcCutoff <- 0.5
diff.RNA.G1.A485 <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)

# Importing loop gene annotation
name <- "chromo_cons_annoHierarchy"

diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = diffCutoff,
                                      annoList = c("P-S"))%>% 
  dplyr::mutate(distance = start2 - start1)

# Testing different ways to calculate representative feature of loops for each gene
diff.RNA <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
geneList.down.RNA <- (diff.RNA.G1.A485 %>% dplyr::filter(shrinked_log2FC < -fcCutoff))$ensembl_gene_id
geneList.up.RNA <- (diff.RNA.G1.A485 %>% dplyr::filter(shrinked_log2FC > fcCutoff))$ensembl_gene_id


# [1] Average of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 4

temp <- geneAnnoData %>% dplyr::select(diff_A485_DMSO, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = mean(diff_A485_DMSO), .groups = 'drop')

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
                              ifelse(gene %in% geneList.up.RNA, "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)

fwrite(temp, here(consensusDir, "gene_avgScore_fc_dTAG_p-s.tsv"), sep = "\t")

p <- ggplot(temp, aes(x = shrlog2fcMax, y = mean_diff_score, color = flag, 
                      label = ifelse(flag != "0NO", external_gene_name, NA),
                      shape = as.factor(maxFlag))) +
  geom_point() + geom_text_repel() + theme_classic() + 
  ggtitle("A485") +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = diffCutoff, linetype = "dashed") +
  geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
  geom_vline(xintercept = 0) +
  geom_vline(xintercept = fcCutoff, linetype = "dashed") +
  geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
  scale_color_manual(values = c("0NO" = "black", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
  scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))

fileName <- paste0("log2FC_vs_avgScore_A485_p-s_diffCutoff_", diffCutoff)
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

# [2] Sum of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 4

temp <- geneAnnoData %>% dplyr::select(diff_A485_DMSO, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(sum_diff_score = sum(diff_A485_DMSO), .groups = 'drop')

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
                              ifelse(gene %in% geneList.up.RNA, "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)

fwrite(temp, here(consensusDir, "gene_sumScore_fc_dTAG_p-s.tsv"), sep = "\t")

p <- ggplot(temp, aes(x = shrlog2fcMax, y = sum_diff_score, color = flag, 
                      label = ifelse(flag != "0NO", external_gene_name, NA),
                      shape = as.factor(maxFlag))) +
  geom_point() + geom_text_repel() + theme_classic() + 
  ggtitle("A485") +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = diffCutoff, linetype = "dashed") +
  geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
  geom_vline(xintercept = 0) +
  geom_vline(xintercept = fcCutoff, linetype = "dashed") +
  geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
  scale_color_manual(values = c("0NO" = "black", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
  scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))

fileName <- paste0("log2FC_vs_sumScore_A485_p-s_diffCutoff_", diffCutoff)
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()


# [3] Max of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 4

temp <- geneAnnoData %>% dplyr::select(diff_A485_DMSO, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(max_abs_diff_score = diff_A485_DMSO[which.max(abs(diff_A485_DMSO))], .groups = 'drop')

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
                              ifelse(gene %in% geneList.up.RNA, "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)

fwrite(temp, here(consensusDir, "gene_maxAbsScore_fc_dTAG_p-s.tsv"), sep = "\t")

p <- ggplot(temp, aes(x = shrlog2fcMax, y = max_abs_diff_score, color = flag, 
                      label = ifelse(flag != "0NO", external_gene_name, NA),
                      shape = as.factor(maxFlag))) +
  geom_point() + geom_text_repel() + theme_classic() + 
  ggtitle("A485") +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = diffCutoff, linetype = "dashed") +
  geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
  geom_vline(xintercept = 0) +
  geom_vline(xintercept = fcCutoff, linetype = "dashed") +
  geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
  scale_color_manual(values = c("0NO" = "black", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
  scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))

fileName <- paste0("log2FC_vs_maxAbsScore_A485_p-s_diffCutoff_", diffCutoff)
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

[2.11] How many loops per gene?

# IMPORTING GENE ANNO DATA FOR P-N LOOPS
name <- "chromo_cons_annoHierarchy"

geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = 0.2,
                                      annoList = c("P-P", "P-E", "P-S", "P-X"))%>% 
  dplyr::mutate(distance = start2 - start1,
                peakID = paste(chrom1, start1, start2, sep = "_"))


# Counting number of loop per genes
tempSum <- geneAnnoData %>% dplyr::select(peakID, gene, Anno2) %>% unnest(gene) %>% group_by(gene) %>%  summarize(
    peak = list(peakID),
    anno2 = list(Anno2),
    count = n())

ggplot(tempSum, aes(x = count)) + geom_histogram(binwidth = 1) + theme_classic() + 
  ggtitle("# of P-N loops for each gene") + scale_y_log10()

###################################################################################

[2.11.2] SuperEnhancer?

# IMPORTING GENE ANNO DATA FOR P-N LOOPS
name <- "chromo_cons_annoHierarchy"

geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = 0.2,
                                      annoList = c("P-P", "P-E", "P-S", "P-X"))

tempSE <- geneAnnoData %>% unnest(gene) %>% group_by(gene) %>% summarize(SE = ifelse(any(AnnoSE == "SE"), 1, 0))


group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group2.tsv"))$gene
group5 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group5.tsv"))$gene
group8 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group8.tsv"))$gene


tempSE <- tempSE %>% rowwise() %>% dplyr::mutate(
  group = ifelse(gene %in% group1, "group1",
                 ifelse(gene %in% group2, "group2",
                        ifelse(gene %in% group5, "group5",
                               ifelse(gene %in% group8, "group8", NA))))) %>% 
  dplyr::filter(!is.na(group))

perc.group1 <- nrow(tempSE %>% dplyr::filter(group == "group1", SE == 1))/
  nrow(tempSE %>% dplyr::filter(group == "group1"))

perc.group2 <- nrow(tempSE %>% dplyr::filter(group == "group2", SE == 1))/
  nrow(tempSE %>% dplyr::filter(group == "group2"))

perc.group5 <- nrow(tempSE %>% dplyr::filter(group == "group5", SE == 1))/
  nrow(tempSE %>% dplyr::filter(group == "group5"))

perc.group8 <- nrow(tempSE %>% dplyr::filter(group == "group8", SE == 1))/
  nrow(tempSE %>% dplyr::filter(group == "group8"))

tempPlot <- tibble(group = c("group1", "group2", "group5", "group8"),
                   perc = c(perc.group1, perc.group2, perc.group5, perc.group8))

ggplot(tempPlot, aes(x = group, y = perc)) + geom_point() + theme_classic() + ylim(0, 0.2)
plot_insScore <- function(temp.tb, note, ymin = 0, ymax = 1.5){
  p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
  p15 <- round(getPvalWilcox(temp.tb, "group1", "group5"), 5)
  p18 <- round(getPvalWilcox(temp.tb, "group1", "group8"), 5)
  p25 <- round(getPvalWilcox(temp.tb, "group2", "group5"), 5)
  p28 <- round(getPvalWilcox(temp.tb, "group2", "group8"), 5)
  p58 <- round(getPvalWilcox(temp.tb, "group5", "group8"), 5)
  
  p <- ggplot(temp.tb, aes(x = group, y = score)) + geom_violin(aes(fill = group), show.legend = FALSE) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() + coord_cartesian(ylim = c(ymin, ymax)) +
    annotate("text", x = 1, y = ymin + 1, label = paste0("p12: ", convPvalue(p12), "\n",
                                                         "p15: ", convPvalue(p15), "\n",
                                                         "p18: ", convPvalue(p18), "\n",
                                                         "p25: ", convPvalue(p25), "\n",
                                                         "p28: ", convPvalue(p28), "\n",
                                                         "p58: ",convPvalue( p58), "\n"),
             color = "black", hjust = 0, size = 3)
  
  
  fileName <- paste0("insulation_score_", note)
  height <- 3
  width <- 3
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}

# DMSO
temp.tb <- gene.insScore %>% dplyr::select(group, boundaryInsScore_DMSO)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_DMSO")

[2.12] How many loops/distance per gene in each quadrant?

dTAG with binary grouping

Splitting genes to group
name <- "chromo_cons_annoHierarchy"

geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = 0.2,
                                      annoList = c("P-P", "P-E", "P-S", "P-X"))%>% 
  dplyr::mutate(distance = start2 - start1,
                peakID = paste(chrom1, start1, start2, sep = "_"))


## Dividing genes into groups
temp <- geneAnnoData %>% dplyr::select(diff_dTAG_DMSO, distance, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = mean(diff_dTAG_DMSO),
            mean_distance = mean(distance),
            .groups = 'drop')

diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)

maxLog2FC = 2

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  drop_na(shrinked_log2FC)


temp <- temp %>%
  dplyr::mutate(group = ifelse(padj < 0.05 & shrinked_log2FC < 0, 1, 
                               ifelse(mean_diff_score < 0, 2, 3)))



fcCutoff <- 0.5
diffCutoff <- 0.2

gene.group1 <- (temp %>% dplyr::filter(group == 1))$gene
gene.group2 <- (temp %>% dplyr::filter(group == 2))$gene
fwrite((temp %>% dplyr::filter(group == 1)), here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 2)), here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"), sep = "\t")


# Adding group information to geneAnno
geneAnnoData <- geneAnnoData %>% unnest(gene) %>% dplyr::mutate(
  group = ifelse(gene %in% gene.group1, "group1",
                 ifelse(gene %in% gene.group2, "group2", NA))
)
- GO for each group

gene.group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
gene.group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene

GO1 <- enrichGO(gene = gene.group1, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE)
GO2 <- enrichGO(gene = gene.group2, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE)
GO1.df <- as.data.frame(GO1)
GO2.df <- as.data.frame(GO2)
# fwrite(GO1.df, here("GO_binary_group1.tsv"), sep = "\t")
# fwrite(GO2.df, here("GO_binary_group2.tsv"), sep = "\t")
GO1.df <- fread(here("GO_binary_group1.tsv"))
GO2.df <- fread(here("GO_binary_group2.tsv"))

subset1 <- GO1.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 1") %>%
  dplyr::arrange(p.adjust)
subset1$GeneRatio <- sapply(strsplit(subset1$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))

subset2 <- GO2.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 2") %>%
  dplyr::arrange(p.adjust)
subset2$GeneRatio <- sapply(strsplit(subset2$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))


GOlist <- factor(c("GO:0001824", "GO:0030879", "GO:0021953", "GO:0019827", 
                 "GO:0022613", "GO:0050767", "GO:0034470", "GO:0016055", 
                 "GO:0006397", "GO:0030900", "GO:0008380"))

data <- bind_rows(subset1, subset2) %>%
  dplyr::filter(ID %in% GOlist)

descOrder <- sort(unique(data$Description))[c(4, 10, 2, 1, 3, 11, 7, 8, 6, 5, 9)]
pValueLogMax <- 10
data <- data %>% dplyr::rowwise() %>% dplyr::mutate(pValueLog = min(-log10(p.adjust), pValueLogMax))
p <- ggplot(data, aes(x = group, y = factor(Description, levels = descOrder), size = pValueLog, fill = GeneRatio)) + 
  geom_point(shape = 21,        # Ensures a point with an outline
             stroke = 1*ptToMM) + 
  scale_size_continuous(range = c(0.5, 2)) +  # Set min and max point sizes here
  scale_fill_gradient(low = "white", high = "#CB333A",
                      # limits = c(0, 1),
                      oob = scales::squish, # Define gradient colors
                      guide = guide_colorbar(
                        barwidth = 1.5/5.08,  # Adjust width of the color bar
                        barheight = 15/5.08   # Adjust height of the color bar
                      )
  ) + 
  labs(x = NULL, y = NULL) +
  theme_bw() +  # Apply theme_bw first, so custom theme settings come after
  theme(
    panel.background = element_rect(fill = "transparent"),  # Override theme_bw panel
    axis.title = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.text.x = element_text(
      size = fontSizeS,         # Ensure size is set for x-axis text
      family = fontType,
      color = "#000000",
    ),
    axis.text.y = element_text(
      size = fontSizeS,         # Ensure size is set for y-axis text
      family = fontType,
      color = "#000000",
      lineheight = 0.9          # Allows wrapping for y-axis labels to fit into 2 lines
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick * mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick * mmToLineUnit,
      lineend = "square"
    ),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )
  


fileName <- here(figDir, "..", "GO", "GO_groups_binaryGrouping")
width <- panelSize(2.5)*mmToInch
height <- panelSize(1.2)*mmToInch
svglite(paste0(fileName, ".svg"), height = height, width = width)
print(p)
dev.off()
- Average loop size
# Checking average distance of loops per gene
# temp is a tibble where delta loop and log2fc are merged
temp$group <- factor(temp$group)

temp <- temp %>% dplyr::filter(group %in% c(1, 2))


getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$mean_distance
  distance2 <- (data %>% dplyr::filter(group ==group2) )$mean_distance
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

pv12 <- round(getPvalWilcox(temp, 1, 2), 5)



p <- ggplot(temp, aes(x = group, y = mean_distance)) + geom_violin(aes(fill = group), show.legend = FALSE) +
  geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() + scale_y_continuous(labels = label_kb_mb) +
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")+
  annotate("text", x = 1, y = 1000000, label = paste0("pv12: ", pv12),
           color = "black", hjust = 0, size = 3)
  
  
fileName <- paste0("size_barplot_diffGroup_dTAG_vs_DMSO_binaryGroup")
height <- 3
width <- 2
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
- Loop number per gene: P-N
# Counting number of loop per genes
tempSum <- geneAnnoData %>% dplyr::select(peakID, gene, Anno2) %>% unnest(gene) %>% group_by(gene) %>%  summarize(
    peak = list(peakID),
    anno2 = list(Anno2),
    count = n())

tempSum <- tempSum %>% dplyr::mutate(
  group = ifelse(gene %in% gene.group1, "group1",
                 ifelse(gene %in% gene.group2, "group2", NA))
) %>% dplyr::filter(!is.na(group)) %>% dplyr::filter(group %in% c("group1", "group2"))

# 
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$count
  distance2 <- (data %>% dplyr::filter(group ==group2) )$count
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}


pv12 <- round(getPvalWilcox(tempSum,"group1", "group2"), 5)
# pv15 <- round(getPvalWilcox(tempSum,"group1", "group5"), 5)
# pv18 <- round(getPvalWilcox(tempSum,"group1", "group8"), 5)
# pv25 <- round(getPvalWilcox(tempSum,"group2", "group5"), 5)
# pv28 <- round(getPvalWilcox(tempSum,"group2", "group8"), 5)
# pv58 <- round(getPvalWilcox(tempSum,"group5", "group8"), 5)


p <- ggplot(tempSum, aes(x = group, y = count, fill = group)) + 
  geom_boxplot(width = 0.5, outlier.shape = NA, color = "black", alpha = 0.6,
                 linewidth = lineThick * mmToLineUnit, lineend = "square", show.legend = FALSE) + theme_classic() +       
  scale_y_continuous(breaks = seq(0, 10, by = 2)) + 
  labs(x = NULL, y = "# of P-N loops per gene") +
  coord_cartesian(ylim = c(0, 8)) +
  stat_summary(
    aes(group = group), fun = mean,
    geom = "point", shape = 21, size = 0.5,
    fill = "black", color = "black"
  )+
  annotate("text", x = 1, y = 3, label = paste0("pv12: ", convPvalue(pv12)),
           color = "black", hjust = 0, size = 1) +
  theme(
      axis.title = element_text(
        size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
    )


fileName <- paste0("count_barplot_diffGroup_dTAG_vs_DMSO_binaryGroup")
width <- panelSize(0.8)*mmToInch
height <- panelSize(1.5)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
- Counting loop types
#######
temp2 <- tempSum %>% rowwise() %>% mutate(total = length(anno2),
                                 num_pp = sum(anno2 == "P-P"),
                                 num_pe = sum(anno2 == "P-E"),
                                 num_ps = sum(anno2 == "P-S"),
                                 num_px = sum(anno2 == "P-X"),
                                 ratio_reg = (num_pp + num_pe)/total,
                                 ratio_str = num_ps/total)

#saveRDS(temp2, here(resultDir, "gene_loop_link_A485.rds"))


loopType <- temp2 %>% group_by(group) %>% summarise(num_pp = sum(num_pp),
                                        num_pe = sum(num_pe),
                                        num_ps = sum(num_ps),
                                        num_px = sum(num_px))


loopTypeLong <- loopType %>% pivot_longer(-group, names_to = "type", values_to = "count")

loopTypeLong$type <- factor(loopTypeLong$type, levels = c("num_pp", "num_pe", "num_ps", "num_px"))

# Plotting
ggplot(loopTypeLong, aes(fill=type, y=count, x=group)) + 
    geom_bar(position="fill", stat="identity") + theme_classic()
- Loop number per gene: P-P
########
#P-P
data <- temp2 %>% dplyr::select(group, num_pp)
colnames(data) <- c("group", "count")
pv12 <- round(getPvalWilcox(data,"group1", "group2"), 5)
# pv15 <- round(getPvalWilcox(data,"group1", "group5"), 5)
# pv18 <- round(getPvalWilcox(data,"group1", "group8"), 5)
# pv25 <- round(getPvalWilcox(data,"group2", "group5"), 5)
# pv28 <- round(getPvalWilcox(data,"group2", "group8"), 5)
# pv58 <- round(getPvalWilcox(data,"group5", "group8"), 5)


p <- ggplot(data, aes(x = group, y = count, fill = group)) + 
  geom_boxplot(width = 0.5, outlier.shape = NA, color = "black", alpha = 0.6,
                 linewidth = lineThick * mmToLineUnit, lineend = "square", show.legend = FALSE) + theme_classic() +       
  scale_y_continuous(breaks = seq(0, 10, by = 2)) + 
  labs(x = NULL, y = "# of P-P loops per gene") +
  coord_cartesian(ylim = c(0, 4)) +
  stat_summary(
    aes(group = group), fun = mean,
    geom = "point", shape = 21, size = 0.5,
    fill = "black", color = "black"
  )+
  annotate("text", x = 1, y = 3, label = paste0("pv12: ", convPvalue(pv12)),
           color = "black", hjust = 0, size = 1) +
  theme(
      axis.title = element_text(
        size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
    )

fileName <- paste0("count_barplot_diffGroup_dTAG_vs_DMSO_pp_binaryGroup")
width <- panelSize(0.8)*mmToInch
height <- panelSize(1.5)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
- Loop number per gene: P-E
########
#P-E
data <- temp2 %>% dplyr::select(group, num_pe)
colnames(data) <- c("group", "count")
pv12 <- round(getPvalWilcox(data,"group1", "group2"), 5)


p <- ggplot(data, aes(x = group, y = count, fill = group)) + 
  geom_boxplot(width = 0.5, outlier.shape = NA, color = "black", alpha = 0.6,
                 linewidth = lineThick * mmToLineUnit, lineend = "square", show.legend = FALSE) + theme_classic() +       
  scale_y_continuous(breaks = seq(0, 10, by = 2)) + 
  labs(x = NULL, y = "# of P-E loops per gene") +
  coord_cartesian(ylim = c(0, 4)) +
  stat_summary(
    aes(group = group), fun = mean,
    geom = "point", shape = 21, size = 0.5,
    fill = "black", color = "black"
  )+
  annotate("text", x = 1, y = 3, label = paste0("pv12: ", convPvalue(pv12)),
           color = "black", hjust = 0, size = 1) +
  theme(
      axis.title = element_text(
        size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
    )



fileName <- paste0("count_barplot_diffGroup_dTAG_vs_DMSO_pe_binaryGroup")
width <- panelSize(0.8)*mmToInch
height <- panelSize(1.5)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
- Loop number per gene: P-S
########
#P-S
data <- temp2 %>% dplyr::select(group, num_ps)
colnames(data) <- c("group", "count")
pv12 <- round(getPvalWilcox(data,"group1", "group2"), 5)


p <- ggplot(data, aes(x = group, y = count, fill = group)) + 
  geom_boxplot(width = 0.5, outlier.shape = NA, color = "black", alpha = 0.6,
                 linewidth = lineThick * mmToLineUnit, lineend = "square", show.legend = FALSE) + theme_classic() +       
  scale_y_continuous(breaks = seq(0, 10, by = 2)) + 
  labs(x = NULL, y = "# of P-S loops per gene") +
  coord_cartesian(ylim = c(0, 4)) +
  stat_summary(
    aes(group = group), fun = mean,
    geom = "point", shape = 21, size = 0.5,
    fill = "black", color = "black"
  )+
  annotate("text", x = 1, y = 3, label = paste0("pv12: ", convPvalue(pv12)),
           color = "black", hjust = 0, size = 1) +
  theme(
      axis.title = element_text(
        size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
    )



fileName <- paste0("count_barplot_diffGroup_dTAG_vs_DMSO_ps_binaryGroup")
width <- panelSize(0.8)*mmToInch
height <- panelSize(1.5)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

dTAG

- Splitting genes to groups
name <- "chromo_cons_annoHierarchy"

geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = 0.2,
                                      annoList = c("P-P", "P-E", "P-S", "P-X"))%>% 
  dplyr::mutate(distance = start2 - start1,
                peakID = paste(chrom1, start1, start2, sep = "_"))



## Dividing genes into groups
temp <- geneAnnoData %>% dplyr::select(diff_dTAG_DMSO, distance, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = mean(diff_dTAG_DMSO),
            mean_distance = mean(distance),
            .groups = 'drop')

diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)

maxLog2FC = 2

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  drop_na(shrinked_log2FC)


fcCutoff <- 0.5
diffCutoff <- 0.2
# 
temp <- temp %>%
  dplyr::mutate(group = ifelse(mean_diff_score < -diffCutoff,
                               ifelse(shrinked_log2FC < -fcCutoff, 1,
                                      ifelse(shrinked_log2FC <fcCutoff, 2, 3)),
                               ifelse(mean_diff_score < diffCutoff,
                                      ifelse(shrinked_log2FC < -fcCutoff, 4,
                                             ifelse(shrinked_log2FC < fcCutoff, 5, 6)),
                                      ifelse(shrinked_log2FC < -fcCutoff, 7,
                                             ifelse(shrinked_log2FC < fcCutoff, 8, 9)))))


gene.group1 <- (temp %>% dplyr::filter(group == 1))$gene
gene.group2 <- (temp %>% dplyr::filter(group == 2))$gene
gene.group3 <- (temp %>% dplyr::filter(group == 3))$gene
gene.group4 <- (temp %>% dplyr::filter(group == 4))$gene
gene.group5 <- (temp %>% dplyr::filter(group == 5))$gene
gene.group6 <- (temp %>% dplyr::filter(group == 6))$gene
gene.group7 <- (temp %>% dplyr::filter(group == 7))$gene
gene.group8 <- (temp %>% dplyr::filter(group == 8))$gene
gene.group9 <- (temp %>% dplyr::filter(group == 9))$gene

fwrite((temp %>% dplyr::filter(group == 1)), here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group1.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 2)), here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group2.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 3)), here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group3.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 4)), here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group4.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 5)), here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group5.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 6)), here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group6.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 7)), here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group7.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 8)), here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group8.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 9)), here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group9.tsv"), sep = "\t")


# Adding group information to geneAnno
geneAnnoData <- geneAnnoData %>% unnest(gene) %>% dplyr::mutate(
  group = ifelse(gene %in% gene.group1, "group1",
                 ifelse(gene %in% gene.group2, "group2",
                        ifelse(gene %in% gene.group3, "group3",
                               ifelse(gene %in% gene.group4, "group4",
                                      ifelse(gene %in% gene.group5, "group5",
                                             ifelse(gene %in% gene.group6, "group6",
                                                    ifelse(gene %in% gene.group7, "group7",
                                                           ifelse(gene %in% gene.group8, "group8",
                                                                  ifelse(gene %in% gene.group9, "group9", NA)))))))))
)
- GO for each group
GO1 <- enrichGO(gene = gene.group1, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO2 <- enrichGO(gene = gene.group2, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO5 <- enrichGO(gene = gene.group5, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO8 <- enrichGO(gene = gene.group8, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")

GO1.df <- as.data.frame(GO1)
GO2.df <- as.data.frame(GO2)
GO3.df <- as.data.frame(GO5)
GO4.df <- as.data.frame(GO8)

subset1 <- GO1.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "group1") %>%
  dplyr::mutate(
    gr = sapply(GeneRatio, function(x) {
      # Split the string by "/"
      parts <- unlist(strsplit(x, "/"))
      # Convert to numeric and perform the division
      as.numeric(parts[1]) / as.numeric(parts[2])
    })
  ) %>% dplyr::arrange(desc(gr))
subset2 <- GO2.df %>% dplyr::select(ID, Description,GeneRatio, p.adjust) %>% dplyr::mutate(group = "group2") %>%
  dplyr::mutate(
    gr = sapply(GeneRatio, function(x) {
      # Split the string by "/"
      parts <- unlist(strsplit(x, "/"))
      # Convert to numeric and perform the division
      as.numeric(parts[1]) / as.numeric(parts[2])
    })
  ) %>% dplyr::arrange(desc(gr))
subset3 <- GO3.df %>% dplyr::select(ID, Description,GeneRatio, p.adjust) %>% dplyr::mutate(group = "group3") %>%
  dplyr::mutate(
    gr = sapply(GeneRatio, function(x) {
      # Split the string by "/"
      parts <- unlist(strsplit(x, "/"))
      # Convert to numeric and perform the division
      as.numeric(parts[1]) / as.numeric(parts[2])
    })
  ) %>% dplyr::arrange(desc(gr))

GOlist <- factor(c("GO:0033002", "GO:0070373", "GO:0048730", "GO:0031103",
                 "GO:0022613", "GO:0050767", "GO:0034470", "GO:0016055", 
                 "GO:0006397", "GO:0030900", "GO:0008380"))

data <- bind_rows(bind_rows(subset1, subset2), subset3) %>%
  dplyr::filter(ID %in% GOlist)

descOrder <- sort(unique(data$Description))[c(1, 2, 5, 7,
                                 3, 4, 6, 8, 9, 10, 11)]

p <- ggplot(data, aes(x = group, y = factor(Description, level = descOrder), color = p.adjust, size = gr)) + 
  geom_point() + theme_bw() +
  scale_color_gradient(low = "red", high = "blue", limits = c(0, 0.05)) +
  scale_size_continuous(range = c(0, 3)) +
  labs(x = NULL, y = NULL) +
  theme(axis.text = element_text(size = 6),  # Set axis text size
        axis.title = element_text(size = 6), # Set axis title size (if not removed)
        legend.text = element_text(size = 6), # Set legend text size
        legend.title = element_text(size = 6)) 

fileName <- here(figDir, "..", "GO", "GO_groups")
height = 2
width = 3.5
svglite(paste0(fileName, ".svg"), height = height, width = width)
print(p)
dev.off()
- Average loop size
# Checking average distance of loops per gene
# temp is a tibble where delta loop and log2fc are merged
temp$group <- factor(temp$group)

#
temp <- temp %>% dplyr::filter(group %in% c(1, 2, 5, 8))


getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$mean_distance
  distance2 <- (data %>% dplyr::filter(group ==group2) )$mean_distance
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

pv12 <- round(getPvalWilcox(temp, 1, 2), 5)
pv15 <- round(getPvalWilcox(temp, 1, 5), 5)
pv18 <- round(getPvalWilcox(temp, 1, 8), 5)
pv25 <- round(getPvalWilcox(temp, 2, 5), 5)
pv28 <- round(getPvalWilcox(temp, 2, 8), 5)
pv58 <- round(getPvalWilcox(temp, 5, 8), 5)


p <- ggplot(temp, aes(x = group, y = mean_distance)) + geom_violin(aes(fill = group), show.legend = FALSE) +
  geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() + scale_y_continuous(labels = label_kb_mb) +
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  annotate("text", x = 1, y = 1000000, label = paste0("pv12: ", pv12, "\n",
                                                      "pv15: ", pv15, "\n",
                                                      "pv18: ", pv18, "\n",
                                                      "pv25: ", pv25, "\n",
                                                      "pv28: ", pv28, "\n",
                                                      "pv58: ", pv58, "\n"), 
           color = "black", hjust = 0, size = 3)
  
  
fileName <- paste0("size_barplot_diffGroup_dTAG_vs_DMSO")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
- Loop number per gene: P-N
# Counting number of loop per genes
tempSum <- geneAnnoData %>% dplyr::select(peakID, gene, Anno2) %>% unnest(gene) %>% group_by(gene) %>%  summarize(
    peak = list(peakID),
    anno2 = list(Anno2),
    count = n())

tempSum <- tempSum %>% dplyr::mutate(
  group = ifelse(gene %in% gene.group1, "group1",
                 ifelse(gene %in% gene.group2, "group2",
                        ifelse(gene %in% gene.group3, "group3",
                               ifelse(gene %in% gene.group4, "group4",
                                      ifelse(gene %in% gene.group5, "group5",
                                             ifelse(gene %in% gene.group6, "group6",
                                                    ifelse(gene %in% gene.group7, "group7",
                                                           ifelse(gene %in% gene.group8, "group8",
                                                                  ifelse(gene %in% gene.group9, "group9", NA)))))))))
) %>%
  dplyr::filter(group %in% c("group1", "group2", "group5", "group8"))


# TEMP START
#geneList.group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group1.tsv"))
#geneList.group1.temp <- geneList.group1 %>% dplyr::left_join(tempSum, by = c("gene"))
# TEMP END

getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$count
  distance2 <- (data %>% dplyr::filter(group ==group2) )$count
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}


pv12 <- round(getPvalWilcox(tempSum,"group1", "group2"), 5)
pv15 <- round(getPvalWilcox(tempSum,"group1", "group5"), 5)
pv18 <- round(getPvalWilcox(tempSum,"group1", "group8"), 5)
pv25 <- round(getPvalWilcox(tempSum,"group2", "group5"), 5)
pv28 <- round(getPvalWilcox(tempSum,"group2", "group8"), 5)
pv58 <- round(getPvalWilcox(tempSum,"group5", "group8"), 5)


p <- ggplot(tempSum, aes(x = group, y = count, fill = group)) + 
  geom_boxplot(width = 0.5, outlier.shape = NA) + theme_classic() +       
  scale_y_continuous(breaks = seq(0, 10, by = 2), limits = c(0, 10)) + 
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")  +
  annotate("text", x = 1, y = 3, label = paste0("pv12: ", pv12, "\n",
                                                "pv15: ", pv15, "\n",
                                                "pv18: ", pv18, "\n",
                                                "pv25: ", pv25, "\n",
                                                "pv28: ", pv28, "\n",
                                                "pv58: ", pv58, "\n"), 
           color = "black", hjust = 0, size = 1) +  theme(legend.position = "none")


fileName <- paste0("count_barplot_diffGroup_dTAG_vs_DMSO")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
- Counting loop types
#######
temp2 <- tempSum %>% rowwise() %>% mutate(total = length(anno2),
                                 num_pp = sum(anno2 == "P-P"),
                                 num_pe = sum(anno2 == "P-E"),
                                 num_ps = sum(anno2 == "P-S"),
                                 num_px = sum(anno2 == "P-X"),
                                 ratio_reg = (num_pp + num_pe)/total,
                                 ratio_str = num_ps/total)

saveRDS(temp2, here(resultDir, "gene_loop_link.rds"))


loopType <- temp2 %>% group_by(group) %>% summarise(num_pp = sum(num_pp),
                                        num_pe = sum(num_pe),
                                        num_ps = sum(num_ps),
                                        num_px = sum(num_px))


loopTypeLong <- loopType %>% pivot_longer(-group, names_to = "type", values_to = "count")

loopTypeLong$type <- factor(loopTypeLong$type, levels = c("num_pp", "num_pe", "num_ps", "num_px"))

# Plotting
ggplot(loopTypeLong, aes(fill=type, y=count, x=group)) + 
    geom_bar(position="fill", stat="identity") + theme_classic()
- Loop number per gene: P-P
########
#P-P
data <- temp2 %>% dplyr::select(group, num_pp)
colnames(data) <- c("group", "count")
pv12 <- round(getPvalWilcox(data,"group1", "group2"), 5)
pv15 <- round(getPvalWilcox(data,"group1", "group5"), 5)
pv18 <- round(getPvalWilcox(data,"group1", "group8"), 5)
pv25 <- round(getPvalWilcox(data,"group2", "group5"), 5)
pv28 <- round(getPvalWilcox(data,"group2", "group8"), 5)
pv58 <- round(getPvalWilcox(data,"group5", "group8"), 5)

p <- ggplot(data, aes(x = group, y = count, fill = group)) +
  geom_boxplot(width = 0.5, outlier.shape = NA) + theme_classic() +       
  scale_y_continuous(breaks = seq(0, 5, by = 2), limits = c(0, 5)) + 
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")  +
  annotate("text", x = 1, y = 3, label = paste0("pv12: ", pv12, "\n",
                                                "pv15: ", pv15, "\n",
                                                "pv18: ", pv18, "\n",
                                                "pv25: ", pv25, "\n",
                                                "pv28: ", pv28, "\n",
                                                "pv58: ", pv58, "\n"), 
           color = "black", hjust = 0, size = 1)+  theme(legend.position = "none")


fileName <- paste0("count_barplot_diffGroup_dTAG_vs_DMSO_pp")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
- Loop number per gene: P-E
########
#P-E
data <- temp2 %>% dplyr::select(group, num_pe)
colnames(data) <- c("group", "count")
pv12 <- round(getPvalWilcox(data,"group1", "group2"), 5)
pv15 <- round(getPvalWilcox(data,"group1", "group5"), 5)
pv18 <- round(getPvalWilcox(data,"group1", "group8"), 5)
pv25 <- round(getPvalWilcox(data,"group2", "group5"), 5)
pv28 <- round(getPvalWilcox(data,"group2", "group8"), 5)
pv58 <- round(getPvalWilcox(data,"group5", "group8"), 5)

p <- ggplot(data, aes(x = group, y = count, fill = group)) +
  geom_boxplot(width = 0.5, outlier.shape = NA) + theme_classic() +       
  scale_y_continuous(breaks = seq(0, 5, by = 2), limits = c(0, 5)) + 
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")  +
  annotate("text", x = 1, y = 3, label = paste0("pv12: ", pv12, "\n",
                                                "pv15: ", pv15, "\n",
                                                "pv18: ", pv18, "\n",
                                                "pv25: ", pv25, "\n",
                                                "pv28: ", pv28, "\n",
                                                "pv58: ", pv58, "\n"), 
           color = "black", hjust = 0, size = 1)+  theme(legend.position = "none")


fileName <- paste0("count_barplot_diffGroup_dTAG_vs_DMSO_pe")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
- Loop number per gene: P-S
########
#P-S
data <- temp2 %>% dplyr::select(group, num_ps)
colnames(data) <- c("group", "count")
pv12 <- round(getPvalWilcox(data,"group1", "group2"), 5)
pv15 <- round(getPvalWilcox(data,"group1", "group5"), 5)
pv18 <- round(getPvalWilcox(data,"group1", "group8"), 5)
pv25 <- round(getPvalWilcox(data,"group2", "group5"), 5)
pv28 <- round(getPvalWilcox(data,"group2", "group8"), 5)
pv58 <- round(getPvalWilcox(data,"group5", "group8"), 5)

p <- ggplot(data, aes(x = group, y = count, fill = group)) +
  geom_boxplot(width = 0.5, outlier.shape = NA) + theme_classic() +       
  scale_y_continuous(breaks = seq(0, 5, by = 2), limits = c(0, 5)) + 
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")  +
  annotate("text", x = 1, y = 3, label = paste0("pv12: ", pv12, "\n",
                                                "pv15: ", pv15, "\n",
                                                "pv18: ", pv18, "\n",
                                                "pv25: ", pv25, "\n",
                                                "pv28: ", pv28, "\n",
                                                "pv58: ", pv58, "\n"), 
           color = "black", hjust = 0, size = 1)+  theme(legend.position = "none")


fileName <- paste0("count_barplot_diffGroup_dTAG_vs_DMSO_ps")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
- Loop number per gene: P-X
########
#P-X
data <- temp2 %>% dplyr::select(group, num_px)
colnames(data) <- c("group", "count")
pv12 <- round(getPvalWilcox(data,"group1", "group2"), 5)
pv15 <- round(getPvalWilcox(data,"group1", "group5"), 5)
pv18 <- round(getPvalWilcox(data,"group1", "group8"), 5)
pv25 <- round(getPvalWilcox(data,"group2", "group5"), 5)
pv28 <- round(getPvalWilcox(data,"group2", "group8"), 5)
pv58 <- round(getPvalWilcox(data,"group5", "group8"), 5)

p <- ggplot(data, aes(x = group, y = count, fill = group)) +
  geom_boxplot(width = 0.5, outlier.shape = NA) + theme_classic() +       
  scale_y_continuous(breaks = seq(0, 5, by = 2), limits = c(0, 5)) + 
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")  +
  annotate("text", x = 1, y = 3, label = paste0("pv12: ", pv12, "\n",
                                                "pv15: ", pv15, "\n",
                                                "pv18: ", pv18, "\n",
                                                "pv25: ", pv25, "\n",
                                                "pv28: ", pv28, "\n",
                                                "pv58: ", pv58, "\n"), 
           color = "black", hjust = 0, size = 1)+  theme(legend.position = "none")


fileName <- paste0("count_barplot_diffGroup_dTAG_vs_DMSO_px")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

A485

- Splitting genes to groups
name <- "chromo_cons_annoHierarchy"

geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = 0.2,
                                      annoList = c("P-P", "P-E", "P-S", "P-X"))%>% 
  dplyr::mutate(distance = start2 - start1,
                peakID = paste(chrom1, start1, start2, sep = "_"))



## Dividing genes into groups
temp <- geneAnnoData %>% dplyr::select(diff_A485_DMSO, distance, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = mean(diff_A485_DMSO),
            mean_distance = mean(distance),
            .groups = 'drop')

diff.RNA <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)


temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  drop_na(shrinked_log2FC)


fcCutoff <- 0.5
diffCutoff <- 0.2
# 
temp <- temp %>%
  dplyr::mutate(group = ifelse(mean_diff_score < -diffCutoff,
                               ifelse(shrinked_log2FC < -fcCutoff, 1,
                                      ifelse(shrinked_log2FC <fcCutoff, 2, 3)),
                               ifelse(mean_diff_score < diffCutoff,
                                      ifelse(shrinked_log2FC < -fcCutoff, 4,
                                             ifelse(shrinked_log2FC < fcCutoff, 5, 6)),
                                      ifelse(shrinked_log2FC < -fcCutoff, 7,
                                             ifelse(shrinked_log2FC < fcCutoff, 8, 9)))))


gene.group1 <- (temp %>% dplyr::filter(group == 1))$gene
gene.group2 <- (temp %>% dplyr::filter(group == 2))$gene
gene.group3 <- (temp %>% dplyr::filter(group == 3))$gene
gene.group4 <- (temp %>% dplyr::filter(group == 4))$gene
gene.group5 <- (temp %>% dplyr::filter(group == 5))$gene
gene.group6 <- (temp %>% dplyr::filter(group == 6))$gene
gene.group7 <- (temp %>% dplyr::filter(group == 7))$gene
gene.group8 <- (temp %>% dplyr::filter(group == 8))$gene
gene.group9 <- (temp %>% dplyr::filter(group == 9))$gene

fwrite((temp %>% dplyr::filter(group == 1)), here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group1.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 2)), here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group2.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 3)), here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group3.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 4)), here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group4.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 5)), here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group5.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 6)), here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group6.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 7)), here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group7.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 8)), here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group8.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 9)), here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group9.tsv"), sep = "\t")


# Adding group information to geneAnno
geneAnnoData <- geneAnnoData %>% unnest(gene) %>% dplyr::mutate(
  group = ifelse(gene %in% gene.group1, "group1",
                 ifelse(gene %in% gene.group2, "group2",
                        ifelse(gene %in% gene.group3, "group3",
                               ifelse(gene %in% gene.group4, "group4",
                                      ifelse(gene %in% gene.group5, "group5",
                                             ifelse(gene %in% gene.group6, "group6",
                                                    ifelse(gene %in% gene.group7, "group7",
                                                           ifelse(gene %in% gene.group8, "group8",
                                                                  ifelse(gene %in% gene.group9, "group9", NA)))))))))
)
- GO for each group
GO1 <- enrichGO(gene = gene.group1, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO2 <- enrichGO(gene = gene.group2, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO3 <- enrichGO(gene = gene.group3, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO4 <- enrichGO(gene = gene.group4, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO5 <- enrichGO(gene = gene.group5, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO6 <- enrichGO(gene = gene.group6, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO7 <- enrichGO(gene = gene.group7, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO8 <- enrichGO(gene = gene.group8, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO9 <- enrichGO(gene = gene.group9, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")

dotplot(GO1, showCategory = 10)

# 
# GO1.df <- as.data.frame(GO1)
# GO2.df <- as.data.frame(GO2)
# GO3.df <- as.data.frame(GO5)
# GO4.df <- as.data.frame(GO8)
# 
# subset1 <- GO1.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "group1") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# subset2 <- GO2.df %>% dplyr::select(ID, Description,GeneRatio, p.adjust) %>% dplyr::mutate(group = "group2") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# subset3 <- GO3.df %>% dplyr::select(ID, Description,GeneRatio, p.adjust) %>% dplyr::mutate(group = "group3") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# 
# GOlist <- factor(c("GO:0033002", "GO:0070373", "GO:0048730", "GO:0031103",
#                  "GO:0022613", "GO:0050767", "GO:0034470", "GO:0016055", 
#                  "GO:0006397", "GO:0030900", "GO:0008380"))
# 
# data <- bind_rows(bind_rows(subset1, subset2), subset3) %>%
#   dplyr::filter(ID %in% GOlist)
# 
# descOrder <- sort(unique(data$Description))[c(1, 2, 5, 7,
#                                  3, 4, 6, 8, 9, 10, 11)]
# 
# p <- ggplot(data, aes(x = group, y = factor(Description, level = descOrder), color = p.adjust, size = gr)) + 
#   geom_point() + theme_bw() +
#   scale_color_gradient(low = "red", high = "blue", limits = c(0, 0.05)) +
#   scale_size_continuous(range = c(0, 3)) +
#   labs(x = NULL, y = NULL) +
#   theme(axis.text = element_text(size = 6),  # Set axis text size
#         axis.title = element_text(size = 6), # Set axis title size (if not removed)
#         legend.text = element_text(size = 6), # Set legend text size
#         legend.title = element_text(size = 6)) 
# 
# fileName <- here(figDir, "..", "GO", "GO_groups")
# height = 2
# width = 3.5
# svglite(paste0(fileName, ".svg"), height = height, width = width)
# print(p)
# dev.off()
- Average loop size
# Checking average distance of loops per gene
# temp is a tibble where delta loop and log2fc are merged
temp$group <- factor(temp$group)


# 
# getPvalWilcox <- function(data, group1, group2){
#   distance1 <- (data %>% dplyr::filter(group ==group1) )$mean_distance
#   distance2 <- (data %>% dplyr::filter(group ==group2) )$mean_distance
#   wil <- wilcox.test(distance1, distance2)
#   return(wil$p.value)
# }
# 
# pv12 <- round(getPvalWilcox(temp, 1, 2), 5)
# pv15 <- round(getPvalWilcox(temp, 1, 5), 5)
# pv18 <- round(getPvalWilcox(temp, 1, 8), 5)
# pv25 <- round(getPvalWilcox(temp, 2, 5), 5)
# pv28 <- round(getPvalWilcox(temp, 2, 8), 5)
# pv58 <- round(getPvalWilcox(temp, 5, 8), 5)


p <- ggplot(temp, aes(x = group, y = mean_distance)) + geom_violin(aes(fill = group), show.legend = FALSE) +
  geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() + scale_y_continuous(labels = label_kb_mb) +
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")
  # annotate("text", x = 1, y = 1000000, label = paste0("pv12: ", pv12, "\n",
  #                                                     "pv15: ", pv15, "\n",
  #                                                     "pv18: ", pv18, "\n",
  #                                                     "pv25: ", pv25, "\n",
  #                                                     "pv28: ", pv28, "\n",
  #                                                     "pv58: ", pv58, "\n"), 
  #          color = "black", hjust = 0, size = 3)
  # 
  
fileName <- paste0("size_barplot_diffGroup_A485_vs_DMSO")
height <- 3
width <- 4
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
- Loop number per gene: P-N
# Counting number of loop per genes
tempSum <- geneAnnoData %>% dplyr::select(peakID, gene, Anno2) %>% unnest(gene) %>% group_by(gene) %>%  summarize(
    peak = list(peakID),
    anno2 = list(Anno2),
    count = n())

tempSum <- tempSum %>% dplyr::mutate(
  group = ifelse(gene %in% gene.group1, "group1",
                 ifelse(gene %in% gene.group2, "group2",
                        ifelse(gene %in% gene.group3, "group3",
                               ifelse(gene %in% gene.group4, "group4",
                                      ifelse(gene %in% gene.group5, "group5",
                                             ifelse(gene %in% gene.group6, "group6",
                                                    ifelse(gene %in% gene.group7, "group7",
                                                           ifelse(gene %in% gene.group8, "group8",
                                                                  ifelse(gene %in% gene.group9, "group9", NA)))))))))
) %>% dplyr::filter(!is.na(group))

# 
# getPvalWilcox <- function(data, group1, group2){
#   distance1 <- (data %>% dplyr::filter(group ==group1) )$count
#   distance2 <- (data %>% dplyr::filter(group ==group2) )$count
#   wil <- wilcox.test(distance1, distance2)
#   return(wil$p.value)
# }
# 
# 
# pv12 <- round(getPvalWilcox(tempSum,"group1", "group2"), 5)
# pv15 <- round(getPvalWilcox(tempSum,"group1", "group5"), 5)
# pv18 <- round(getPvalWilcox(tempSum,"group1", "group8"), 5)
# pv25 <- round(getPvalWilcox(tempSum,"group2", "group5"), 5)
# pv28 <- round(getPvalWilcox(tempSum,"group2", "group8"), 5)
# pv58 <- round(getPvalWilcox(tempSum,"group5", "group8"), 5)


p <- ggplot(tempSum, aes(x = group, y = count, fill = group)) + 
  geom_boxplot(width = 0.5, outlier.shape = NA) + theme_classic() +       
  scale_y_continuous(breaks = seq(0, 10, by = 2)) + 
  coord_cartesian(ylim = c(0, 10)) +
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  theme(legend.position = "none")
  # annotate("text", x = 1, y = 3, label = paste0("pv12: ", pv12, "\n",
  #                                               "pv15: ", pv15, "\n",
  #                                               "pv18: ", pv18, "\n",
  #                                               "pv25: ", pv25, "\n",
  #                                               "pv28: ", pv28, "\n",
  #                                               "pv58: ", pv58, "\n"), 
  #          color = "black", hjust = 0, size = 1) +  


fileName <- paste0("count_barplot_diffGroup_A485_vs_DMSO")
height <- 3
width <- 4
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
- Counting loop types
#######
temp2 <- tempSum %>% rowwise() %>% mutate(total = length(anno2),
                                 num_pp = sum(anno2 == "P-P"),
                                 num_pe = sum(anno2 == "P-E"),
                                 num_ps = sum(anno2 == "P-S"),
                                 num_px = sum(anno2 == "P-X"),
                                 ratio_reg = (num_pp + num_pe)/total,
                                 ratio_str = num_ps/total)

saveRDS(temp2, here(resultDir, "gene_loop_link_A485.rds"))


loopType <- temp2 %>% group_by(group) %>% summarise(num_pp = sum(num_pp),
                                        num_pe = sum(num_pe),
                                        num_ps = sum(num_ps),
                                        num_px = sum(num_px))


loopTypeLong <- loopType %>% pivot_longer(-group, names_to = "type", values_to = "count")

loopTypeLong$type <- factor(loopTypeLong$type, levels = c("num_pp", "num_pe", "num_ps", "num_px"))

# Plotting
ggplot(loopTypeLong, aes(fill=type, y=count, x=group)) + 
    geom_bar(position="fill", stat="identity") + theme_classic()
- Loop number per gene: P-P
########
#P-P
data <- temp2 %>% dplyr::select(group, num_pp)
colnames(data) <- c("group", "count")
# pv12 <- round(getPvalWilcox(data,"group1", "group2"), 5)
# pv15 <- round(getPvalWilcox(data,"group1", "group5"), 5)
# pv18 <- round(getPvalWilcox(data,"group1", "group8"), 5)
# pv25 <- round(getPvalWilcox(data,"group2", "group5"), 5)
# pv28 <- round(getPvalWilcox(data,"group2", "group8"), 5)
# pv58 <- round(getPvalWilcox(data,"group5", "group8"), 5)

p <- ggplot(data, aes(x = group, y = count, fill = group)) +
  geom_boxplot(width = 0.5, outlier.shape = NA) + theme_classic() +       
  scale_y_continuous(breaks = seq(0, 5, by = 2), limits = c(0, 5)) + 
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")  +
  theme(legend.position = "none")
  # annotate("text", x = 1, y = 3, label = paste0("pv12: ", pv12, "\n",
  #                                               "pv15: ", pv15, "\n",
  #                                               "pv18: ", pv18, "\n",
  #                                               "pv25: ", pv25, "\n",
  #                                               "pv28: ", pv28, "\n",
  #                                               "pv58: ", pv58, "\n"), 
  #          color = "black", hjust = 0, size = 1)


fileName <- paste0("count_barplot_diffGroup_A485_vs_DMSO_pp")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
- Loop number per gene: P-E
########
#P-E
data <- temp2 %>% dplyr::select(group, num_pe)
colnames(data) <- c("group", "count")
# pv12 <- round(getPvalWilcox(data,"group1", "group2"), 5)
# pv15 <- round(getPvalWilcox(data,"group1", "group5"), 5)
# pv18 <- round(getPvalWilcox(data,"group1", "group8"), 5)
# pv25 <- round(getPvalWilcox(data,"group2", "group5"), 5)
# pv28 <- round(getPvalWilcox(data,"group2", "group8"), 5)
# pv58 <- round(getPvalWilcox(data,"group5", "group8"), 5)

p <- ggplot(data, aes(x = group, y = count, fill = group)) +
  geom_boxplot(width = 0.5, outlier.shape = NA) + theme_classic() +       
  scale_y_continuous(breaks = seq(0, 5, by = 2), limits = c(0, 5)) + 
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")  +
  theme(legend.position = "none")
  # annotate("text", x = 1, y = 3, label = paste0("pv12: ", pv12, "\n",
  #                                               "pv15: ", pv15, "\n",
  #                                               "pv18: ", pv18, "\n",
  #                                               "pv25: ", pv25, "\n",
  #                                               "pv28: ", pv28, "\n",
  #                                               "pv58: ", pv58, "\n"), 
  #          color = "black", hjust = 0, size = 1)


fileName <- paste0("count_barplot_diffGroup_A485_vs_DMSO_pe")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
- Loop number per gene: P-S
########
#P-S
data <- temp2 %>% dplyr::select(group, num_ps)
colnames(data) <- c("group", "count")
# pv12 <- round(getPvalWilcox(data,"group1", "group2"), 5)
# pv15 <- round(getPvalWilcox(data,"group1", "group5"), 5)
# pv18 <- round(getPvalWilcox(data,"group1", "group8"), 5)
# pv25 <- round(getPvalWilcox(data,"group2", "group5"), 5)
# pv28 <- round(getPvalWilcox(data,"group2", "group8"), 5)
# pv58 <- round(getPvalWilcox(data,"group5", "group8"), 5)

p <- ggplot(data, aes(x = group, y = count, fill = group)) +
  geom_boxplot(width = 0.5, outlier.shape = NA) + theme_classic() +       
  scale_y_continuous(breaks = seq(0, 5, by = 2), limits = c(0, 5)) + 
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")  +
  theme(legend.position = "none")
  # annotate("text", x = 1, y = 3, label = paste0("pv12: ", pv12, "\n",
  #                                               "pv15: ", pv15, "\n",
  #                                               "pv18: ", pv18, "\n",
  #                                               "pv25: ", pv25, "\n",
  #                                               "pv28: ", pv28, "\n",
  #                                               "pv58: ", pv58, "\n"), 
  #          color = "black", hjust = 0, size = 1)+  


fileName <- paste0("count_barplot_diffGroup_A485_vs_DMSO_ps")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
- Loop number per gene: P-X
########
#P-X
data <- temp2 %>% dplyr::select(group, num_px)
colnames(data) <- c("group", "count")
# pv12 <- round(getPvalWilcox(data,"group1", "group2"), 5)
# pv15 <- round(getPvalWilcox(data,"group1", "group5"), 5)
# pv18 <- round(getPvalWilcox(data,"group1", "group8"), 5)
# pv25 <- round(getPvalWilcox(data,"group2", "group5"), 5)
# pv28 <- round(getPvalWilcox(data,"group2", "group8"), 5)
# pv58 <- round(getPvalWilcox(data,"group5", "group8"), 5)

p <- ggplot(data, aes(x = group, y = count, fill = group)) +
  geom_boxplot(width = 0.5, outlier.shape = NA) + theme_classic() +       
  scale_y_continuous(breaks = seq(0, 5, by = 2), limits = c(0, 5)) + 
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")  +
  theme(legend.position = "none")
  # annotate("text", x = 1, y = 3, label = paste0("pv12: ", pv12, "\n",
  #                                               "pv15: ", pv15, "\n",
  #                                               "pv18: ", pv18, "\n",
  #                                               "pv25: ", pv25, "\n",
  #                                               "pv28: ", pv28, "\n",
  #                                               "pv58: ", pv58, "\n"), 
  #          color = "black", hjust = 0, size = 1)+  


fileName <- paste0("count_barplot_diffGroup_A485_vs_DMSO_px")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

dTAG

Grouping with P-N number

Grouping
name <- "chromo_cons_annoHierarchy"

alpha <- 0.05
fcCutoff <- 0.5
diff.PRO.G1.dTAG <- fread(here(refDir, "diff_G1G2.dTAG_G1.dTAG_vs_G1.DMSO_PROseq.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.PRO.G2.dTAG <- fread(here(refDir, "diff_G1G2.dTAG_G2.dTAG_vs_G2.DMSO_PROseq.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG.noFCcutoff <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::filter(padj < alpha)
############
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = 0.2,
                                      annoList = c("P-P", "P-E", "P-S", "P-X"))%>% 
  dplyr::mutate(distance = start2 - start1,
                peakID = paste(chrom1, start1, start2, sep = "_"))



# temp2 contains genes from group 1, 2, 5, 8 and loop counts
#calculating diff score and log2fc distribution based on p-n numbers
resultDir <- here("../../result")

temp2 <- readRDS(here(resultDir, "gene_loop_link.rds"))


pnOver8 <- (temp2 %>% dplyr::filter(total >= 8))$gene
pnOver6 <- (temp2 %>% dplyr::filter(total >= 6, total < 8))$gene
pnOver4 <- (temp2 %>% dplyr::filter(total >= 4, total < 6))$gene
pnOver2 <- (temp2 %>% dplyr::filter(total >= 2, total < 4))$gene
pnOver0 <- (temp2 %>% dplyr::filter(total < 2))$gene



## Dividing genes into groups
temp <- geneAnnoData %>% dplyr::select(diff_dTAG_DMSO, distance, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = mean(diff_dTAG_DMSO),
            mean_distance = mean(distance),
            .groups = 'drop')

diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)

maxLog2FC = 2

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% diff.RNA.G1.dTAG$ensembl_gene_id, "2DOWN", "0NO"),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                log2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag) %>%
  drop_na(shrinked_log2FC)

temp <- temp %>% dplyr::mutate(
  pnOver = ifelse(gene %in% pnOver8, "pnOver8",
                  ifelse(gene %in% pnOver6, "pnOver6",
                         ifelse(gene %in% pnOver4, "pnOver4",
                                ifelse(gene %in% pnOver2, "pnOver2",
                                       ifelse(gene %in% pnOver0, "pnOver0", NA)))))) %>%
  drop_na(pnOver)
GO for each group
GOfigDir <- here(figDir, "../GO")
getGO("pnOver8", GOfigDir, pnOver8)
getGO("pnOver6", GOfigDir, pnOver6)
getGO("pnOver4", GOfigDir, pnOver4)
getGO("pnOver2", GOfigDir, pnOver2)
getGO("pnOver0", GOfigDir, pnOver0)

#####################
GO0.df <- as.data.frame(enrichGO(gene = pnOver0, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
GO2.df <- as.data.frame(enrichGO(gene = pnOver2, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
GO4.df <- as.data.frame(enrichGO(gene = pnOver4, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
GO6.df <- as.data.frame(enrichGO(gene = pnOver6, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
GO8.df <- as.data.frame(enrichGO(gene = pnOver8, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))


subset0 <- GO0.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "pnOver0") %>%
  dplyr::mutate(
    gr = sapply(GeneRatio, function(x) {
      # Split the string by "/"
      parts <- unlist(strsplit(x, "/"))
      # Convert to numeric and perform the division
      as.numeric(parts[1]) / as.numeric(parts[2])
    })
  ) %>% dplyr::arrange(desc(gr))
subset2 <- GO2.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "pnOver2") %>%
  dplyr::mutate(
    gr = sapply(GeneRatio, function(x) {
      # Split the string by "/"
      parts <- unlist(strsplit(x, "/"))
      # Convert to numeric and perform the division
      as.numeric(parts[1]) / as.numeric(parts[2])
    })
  ) %>% dplyr::arrange(desc(gr))
subset4 <- GO4.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "pnOver4") %>%
  dplyr::mutate(
    gr = sapply(GeneRatio, function(x) {
      # Split the string by "/"
      parts <- unlist(strsplit(x, "/"))
      # Convert to numeric and perform the division
      as.numeric(parts[1]) / as.numeric(parts[2])
    })
  ) %>% dplyr::arrange(desc(gr))
subset6 <- GO6.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "pnOver6") %>%
  dplyr::mutate(
    gr = sapply(GeneRatio, function(x) {
      # Split the string by "/"
      parts <- unlist(strsplit(x, "/"))
      # Convert to numeric and perform the division
      as.numeric(parts[1]) / as.numeric(parts[2])
    })
  ) %>% dplyr::arrange(desc(gr))
subset8 <- GO8.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "pnOver8") %>%
  dplyr::mutate(
    gr = sapply(GeneRatio, function(x) {
      # Split the string by "/"
      parts <- unlist(strsplit(x, "/"))
      # Convert to numeric and perform the division
      as.numeric(parts[1]) / as.numeric(parts[2])
    })
  ) %>% dplyr::arrange(desc(gr))


GOlist <- factor(c("GO:0006397", "GO:0008380", "GO:0022613", "GO:0034470",
                   "GO:0016055", "GO:0007389", "GO:0048562", "GO:0045165", 
                   "GO:0072001", "GO:0007517", "GO:0048705"))

data <- bind_rows(bind_rows(bind_rows(subset0, subset2), subset4), subset8) %>%
  dplyr::filter(ID %in% GOlist)

p <- ggplot(data, aes(x = group, y = Description, color = p.adjust, size = gr)) + 
  geom_point() + theme_bw() +
  scale_color_gradient(low = "red", high = "blue", limits = c(0, 0.05)) +
  scale_size_continuous(range = c(0, 3)) +
  labs(x = NULL, y = NULL) +
  theme(axis.text = element_text(size = 6),  # Set axis text size
        axis.title = element_text(size = 6), # Set axis title size (if not removed)
        legend.text = element_text(size = 6), # Set legend text size
        legend.title = element_text(size = 6)) 

fileName <- here(figDir, "..", "GO", "GO_groups_pn")
height = 2
width = 3.3
svglite(paste0(fileName, ".svg"), height = height, width = width)
print(p)
dev.off()
loop score
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(pnOver ==group1) )$mean_diff_score
  distance2 <- (data %>% dplyr::filter(pnOver ==group2) )$mean_diff_score
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

pv02 <- round(getPvalWilcox(temp, "pnOver0", "pnOver2"), 5)
pv24 <- round(getPvalWilcox(temp, "pnOver2", "pnOver4"), 5)
pv46 <- round(getPvalWilcox(temp, "pnOver4", "pnOver6"), 5)
pv08 <- round(getPvalWilcox(temp, "pnOver0", "pnOver8"), 5)
pv28 <- round(getPvalWilcox(temp, "pnOver2", "pnOver8"), 5)
pv48 <- round(getPvalWilcox(temp, "pnOver4", "pnOver8"), 5)
pv68 <- round(getPvalWilcox(temp, "pnOver6", "pnOver8"), 5)


p <- ggplot(temp, aes(x = pnOver, y = mean_diff_score)) + geom_violin(aes(fill = pnOver), show.legend = FALSE) +
  geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() + 
  stat_summary(aes(group = pnOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  geom_hline(yintercept = 0.2)+
  annotate("text", x = 1, y = 0, label = paste0("pv02: ", convPvalue(pv02), "\n",
                                                "pv24: ", convPvalue(pv24), "\n",
                                                "pv46: ", convPvalue(pv46), "\n",
                                                "pv68: ", convPvalue(pv68), "\n",
                                                "pv48: ", convPvalue(pv48), "\n",
                                                "pv28: ", convPvalue(pv28), "\n",
                                                "pv08: ", convPvalue(pv08), "\n"), 
           color = "black", hjust = 0, size = 3)

  
fileName <- paste0("diffScore_barplot_pnGroup_dTAG_vs_DMSO")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
log2FC
#
 getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(pnOver ==group1) )$log2FoldChange
  distance2 <- (data %>% dplyr::filter(pnOver ==group2) )$log2FoldChange
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

tempDown <- temp %>% dplyr::filter(log2FoldChange < 0)
 
pv02 <- round(getPvalWilcox(tempDown, "pnOver0", "pnOver2"), 5)
pv04 <- round(getPvalWilcox(tempDown, "pnOver0", "pnOver4"), 5)
pv06 <- round(getPvalWilcox(tempDown, "pnOver0", "pnOver6"), 5)
pv08 <- round(getPvalWilcox(tempDown, "pnOver0", "pnOver8"), 5)
pv24 <- round(getPvalWilcox(tempDown, "pnOver2", "pnOver4"), 5)
pv26 <- round(getPvalWilcox(tempDown, "pnOver2", "pnOver6"), 5)
pv28 <- round(getPvalWilcox(tempDown, "pnOver2", "pnOver8"), 5)
pv46 <- round(getPvalWilcox(tempDown, "pnOver4", "pnOver6"), 5)
pv48 <- round(getPvalWilcox(tempDown, "pnOver4", "pnOver8"), 5)
pv68 <- round(getPvalWilcox(tempDown, "pnOver6", "pnOver8"), 5)




p <- ggplot(tempDown, aes(x = pnOver, y = log2FoldChange)) + geom_violin(aes(fill = pnOver), show.legend = FALSE) +
  geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() + 
  stat_summary(aes(group = pnOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  geom_hline(yintercept = 0)+
  annotate("text", x = 1, y = -0.5, label = paste0("pv02: ", convPvalue(pv02), "\n",
                                                   "pv04: ", convPvalue(pv04), "\n",
                                                   "pv06: ", convPvalue(pv06), "\n",
                                                   "pv08: ", convPvalue(pv08), "\n",
                                                   "pv24: ", convPvalue(pv24), "\n",
                                                   "pv26: ", convPvalue(pv26), "\n",
                                                   "pv28: ", convPvalue(pv28), "\n",
                                                   "pv46: ", convPvalue(pv46), "\n",
                                                   "pv48: ", convPvalue(pv48), "\n",
                                                   "pv68: ", convPvalue(pv68), "\n"), 
           color = "black", hjust = 0, size = 3) + coord_cartesian(ylim = c(-1, 0))

  
fileName <- paste0("log2FC_barplot_pnGroup_dTAG_vs_DMSO_down")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()



tempUp <- temp %>% dplyr::filter(log2FoldChange > 0)
 
pv02 <- round(getPvalWilcox(tempUp, "pnOver0", "pnOver2"), 5)
pv04 <- round(getPvalWilcox(tempUp, "pnOver0", "pnOver4"), 5)
pv06 <- round(getPvalWilcox(tempUp, "pnOver0", "pnOver6"), 5)
pv08 <- round(getPvalWilcox(tempUp, "pnOver0", "pnOver8"), 5)
pv24 <- round(getPvalWilcox(tempUp, "pnOver2", "pnOver4"), 5)
pv26 <- round(getPvalWilcox(tempUp, "pnOver2", "pnOver6"), 5)
pv28 <- round(getPvalWilcox(tempUp, "pnOver2", "pnOver8"), 5)
pv46 <- round(getPvalWilcox(tempUp, "pnOver4", "pnOver6"), 5)
pv48 <- round(getPvalWilcox(tempUp, "pnOver4", "pnOver8"), 5)
pv68 <- round(getPvalWilcox(tempUp, "pnOver6", "pnOver8"), 5)




p <- ggplot(tempUp, aes(x = pnOver, y = log2FoldChange)) + geom_violin(aes(fill = pnOver), show.legend = FALSE) +
  geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() + 
  stat_summary(aes(group = pnOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  geom_hline(yintercept = 0)+
  annotate("text", x = 1, y = 0.5, label = paste0("pv02: ", convPvalue(pv02), "\n",
                                                   "pv04: ", convPvalue(pv04), "\n",
                                                   "pv06: ", convPvalue(pv06), "\n",
                                                   "pv08: ", convPvalue(pv08), "\n",
                                                   "pv24: ", convPvalue(pv24), "\n",
                                                   "pv26: ", convPvalue(pv26), "\n",
                                                   "pv28: ", convPvalue(pv28), "\n",
                                                   "pv46: ", convPvalue(pv46), "\n",
                                                   "pv48: ", convPvalue(pv48), "\n",
                                                   "pv68: ", convPvalue(pv68), "\n"), 
           color = "black", hjust = 0, size = 3) + coord_cartesian(ylim = c(0, 1))

  
fileName <- paste0("log2FC_barplot_pnGroup_dTAG_vs_DMSO_up")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

Grouping with P-S number

Grouping
# temp2 contains genes from group 1, 2, 5, 8 and loop counts
#calculating diff score and log2fc distribution based on p-n numbers

psOver4 <- (temp2 %>% dplyr::filter(num_ps >= 4))$gene
psOver3 <- (temp2 %>% dplyr::filter(num_ps >= 3, num_ps < 4))$gene
psOver2 <- (temp2 %>% dplyr::filter(num_ps >= 2, num_ps < 3))$gene
psOver1 <- (temp2 %>% dplyr::filter(num_ps >= 1, num_ps < 2))$gene
psOver0 <- (temp2 %>% dplyr::filter(num_ps < 1))$gene



## Dividing genes into groups
temp <- geneAnnoData %>% dplyr::select(diff_dTAG_DMSO, distance, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = mean(diff_dTAG_DMSO),
            mean_distance = mean(distance),
            .groups = 'drop')

diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)

maxLog2FC = 2

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% diff.RNA.G1.dTAG$ensembl_gene_id, "2DOWN", "0NO"),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                log2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag) %>%
  drop_na(shrinked_log2FC)

temp <- temp %>% dplyr::mutate(
  psOver = ifelse(gene %in% psOver4, "psOver4",
                  ifelse(gene %in% psOver3, "psOver3",
                         ifelse(gene %in% psOver2, "psOver2",
                                ifelse(gene %in% psOver1, "psOver1",
                                       ifelse(gene %in% psOver0, "psOver0", NA)))))) %>%
  drop_na(psOver)
GO for each group
GOfigDir <- here(figDir, "../GO")

#####################
GO0.df <- as.data.frame(enrichGO(gene = psOver0, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))
GO1.df <- as.data.frame(enrichGO(gene = psOver1, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))
GO2.df <- as.data.frame(enrichGO(gene = psOver2, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))
GO3.df <- as.data.frame(enrichGO(gene = psOver3, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))
GO4.df <- as.data.frame(enrichGO(gene = psOver4, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))

fwrite(GO0.df, here("GO_PS_group0.tsv"), sep = "\t")
fwrite(GO1.df, here("GO_PS_group1.tsv"), sep = "\t")
fwrite(GO2.df, here("GO_PS_group2.tsv"), sep = "\t")
fwrite(GO3.df, here("GO_PS_group3.tsv"), sep = "\t")
fwrite(GO4.df, here("GO_PS_group4.tsv"), sep = "\t")

GO0.df <- fread(here("GO_PS_group0.tsv"))
GO1.df <- fread(here("GO_PS_group1.tsv"))
GO2.df <- fread(here("GO_PS_group2.tsv"))
GO3.df <- fread(here("GO_PS_group3.tsv"))
GO4.df <- fread(here("GO_PS_group4.tsv"))

subset0 <- GO0.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 0") %>%
  dplyr::arrange(p.adjust)
subset1 <- GO1.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 1") %>%
  dplyr::arrange(p.adjust)
subset2 <- GO2.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 2") %>%
  dplyr::arrange(p.adjust)
subset3 <- GO3.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 3") %>%
  dplyr::arrange(p.adjust)
subset4 <- GO4.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 4") %>%
  dplyr::arrange(p.adjust)

subset0$GeneRatio <- sapply(strsplit(subset0$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))
subset1$GeneRatio <- sapply(strsplit(subset1$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))
subset2$GeneRatio <- sapply(strsplit(subset2$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))
subset3$GeneRatio <- sapply(strsplit(subset3$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))
subset4$GeneRatio <- sapply(strsplit(subset4$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))


GOlist <- factor(c("GO:0008380", "GO:0006397", "GO:0034470", "GO:0022613", "GO:0016055",
                   "GO:0061138", "GO:0060562", "GO:0007389", "GO:0060485", "GO:0048638", "GO:0045664"))

data <- bind_rows(bind_rows(bind_rows(bind_rows(subset0, subset1), subset2), subset3), subset4) %>%
  dplyr::filter(ID %in% GOlist)

descOrder <- sort(unique(data$Description))[rev(c(10, 4, 5, 9, 11,
                                                  2, 6,
                                                  3, 1, 8, 7))]

data <- data %>% dplyr::rowwise() %>% dplyr::mutate(pValueLog = min(-log10(p.adjust), pValueLogMax))
p <- ggplot(data, aes(x = group, y = factor(Description, levels = descOrder), size = pValueLog, fill = GeneRatio)) + 
  geom_point(shape = 21,        # Ensures a point with an outline
             stroke = 1*ptToMM) + 
  scale_size_continuous(range = c(0.5, 2)) +  # Set min and max point sizes here
  scale_fill_gradient(low = "white", high = "#CB333A",
                      # limits = c(0, 1),
                      oob = scales::squish, # Define gradient colors
                      guide = guide_colorbar(
                        barwidth = 1.5/5.08,  # Adjust width of the color bar
                        barheight = 15/5.08   # Adjust height of the color bar
                      )
  ) + 
  labs(x = NULL, y = NULL) +
  theme_bw() +  # Apply theme_bw first, so custom theme settings come after
  theme(
    panel.background = element_rect(fill = "transparent"),  # Override theme_bw panel
    axis.title = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.text.x = element_text(
      size = fontSizeS,         # Ensure size is set for x-axis text
      family = fontType,
      color = "#000000",
    ),
    axis.text.y = element_text(
      size = fontSizeS,         # Ensure size is set for y-axis text
      family = fontType,
      color = "#000000",
      lineheight = 0.9          # Allows wrapping for y-axis labels to fit into 2 lines
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick * mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick * mmToLineUnit,
      lineend = "square"
    ),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )

fileName <- here(figDir, "..", "GO", "GO_groups_ps")
width <- panelSize(2.7)*mmToInch
height <- panelSize(1.2)*mmToInch
svglite(paste0(fileName, ".svg"), height = height, width = width)
print(p)
dev.off()
loop score
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(psOver ==group1) )$mean_diff_score
  distance2 <- (data %>% dplyr::filter(psOver ==group2) )$mean_diff_score
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

ps01 <- round(getPvalWilcox(temp, "psOver0", "psOver1"), 5)
ps12 <- round(getPvalWilcox(temp, "psOver1", "psOver2"), 5)
ps23 <- round(getPvalWilcox(temp, "psOver2", "psOver3"), 5)
ps34 <- round(getPvalWilcox(temp, "psOver3", "psOver4"), 5)
ps24 <- round(getPvalWilcox(temp, "psOver2", "psOver4"), 5)
ps14 <- round(getPvalWilcox(temp, "psOver1", "psOver4"), 5)
ps04 <- round(getPvalWilcox(temp, "psOver0", "psOver4"), 5)


p <- ggplot(temp, aes(x = psOver, y = mean_diff_score)) + 
  geom_violin(aes(fill = psOver), 
              color = "black",
              linewidth = lineMedium * mmToLineUnit, lineend = "square",
              show.legend = FALSE) +
  geom_boxplot(width = 0.3, color = "black",
               linewidth = lineMedium * mmToLineUnit, lineend = "square",
               outlier.shape = NA
  ) + theme_classic() + labs(x = NULL , y = "Average Δ loop score") +
  stat_summary(
    aes(group = psOver), fun = mean,
    geom = "point", shape = 21, size = 0.5,
    fill = "black", color = "black"
  ) +
  geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
  theme(
    axis.title = element_text(
      size = fontSizeM, family = fontType, color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45, hjust = 1, vjust = 1
    ),
    axis.line = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )+
  annotate("text", x = 1, y = 0, label = paste0("ps01: ", convPvalue(ps01), "\n",
                                                "ps12: ", convPvalue(ps12), "\n",
                                                "ps23: ", convPvalue(ps23), "\n",
                                                "ps34: ", convPvalue(ps34), "\n",
                                                "ps24: ", convPvalue(ps24), "\n", 
                                                "ps14: ", convPvalue(ps14), "\n",
                                                "ps04: ", convPvalue(ps04), "\n"), 
           color = "black", hjust = 0, size = 2) +
    scale_fill_manual(values = c("#777777", "#8B7E65", "#A28452", "#C2884D", "#F28E2C")) +
  coord_cartesian(ylim = c(-0.5, 0.1))



fileName <- paste0("diffScore_barplot_psGroup_dTAG_vs_DMSO_2")
width <- 33*mmToInch
height <-33*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
log2FC
temp <- temp %>% dplyr::mutate(absLog2FC = abs(log2FoldChange))

ks_result1 <- ks.test(
  temp %>% dplyr::filter(psOver == "psOver4") %>% pull(absLog2FC),
  temp %>% dplyr::filter(psOver == "psOver3") %>% pull(absLog2FC)
)
ks_result2 <- ks.test(
  temp %>% dplyr::filter(psOver == "psOver4") %>% pull(absLog2FC),
  temp %>% dplyr::filter(psOver == "psOver2") %>% pull(absLog2FC)
)
ks_result3 <- ks.test(
  temp %>% dplyr::filter(psOver == "psOver4") %>% pull(absLog2FC),
  temp %>% dplyr::filter(psOver == "psOver1") %>% pull(absLog2FC)
)
ks_result4 <- ks.test(
  temp %>% dplyr::filter(psOver == "psOver4") %>% pull(absLog2FC),
  temp %>% dplyr::filter(psOver == "psOver0") %>% pull(absLog2FC)
)

p <- ggplot(temp, aes(x = absLog2FC, color = psOver)) +
scale_color_manual(values = (c("#777777", "#8B7E65", "#A28452", "#C2884D", "#F28E2C"))) +
  stat_ecdf(size = 0.4, linewidth = lineMedium * mmToLineUnit, lineend = "square" ) + # Use stat_ecdf to plot the empirical CDF
  labs(
    x = "Abs. log2(fold change)",
    y = "Cumulative Probability"
  ) + coord_cartesian(xlim = c(0, 1.5)) +
  theme_classic() + # Clean theme
  theme(
      axis.title = element_text(
        size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.position = "none",
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
    ) + scale_y_continuous(labels = scales::number_format(accuracy = 0.1))
fileName <- paste0("log2FC_cdf_psGroup_dTAG_vs_DMSO")
width <- 33*mmToInch
height <-33*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()




#
 getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(psOver ==group1) )$log2FoldChange
  distance2 <- (data %>% dplyr::filter(psOver ==group2) )$log2FoldChange
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

tempDown <- temp %>% dplyr::filter(log2FoldChange < 0)
 
ps01 <- round(getPvalWilcox(tempDown, "psOver0", "psOver1"), 5)
ps12 <- round(getPvalWilcox(tempDown, "psOver1", "psOver2"), 5)
ps23 <- round(getPvalWilcox(tempDown, "psOver2", "psOver3"), 5)
ps34 <- round(getPvalWilcox(tempDown, "psOver3", "psOver4"), 5)
ps24 <- round(getPvalWilcox(tempDown, "psOver2", "psOver4"), 5)
ps14 <- round(getPvalWilcox(tempDown, "psOver1", "psOver4"), 5)

ps04 <- round(getPvalWilcox(tempDown, "psOver0", "psOver4"), 5)


p <- ggplot(tempDown, aes(x = psOver, y = log2FoldChange)) + 
  scale_fill_manual(values = c("#D9D9D9", "#BFBFBF", "#A6A6A6", "#8C8C8C", "#737373")) + # Five shades of grey
  geom_violin(aes(fill = psOver), 
              color = "black",
              linewidth = lineThick * mmToLineUnit, lineend = "square",
              show.legend = FALSE) +
  geom_boxplot(width = 0.1, color = "black",
               linewidth = lineThick * mmToLineUnit, lineend = "square",
               outlier.shape = NA
  ) + theme_classic() + labs(x = NULL , y = "log2(fold change)") +
  stat_summary(
    aes(group = psOver), fun = mean,
    geom = "point", shape = 21, size = 1,
    fill = "red", color = "black"
  ) +
  geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
  theme(
    axis.title = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45, hjust = 1, vjust = 1
    ),
    axis.line = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )+
  annotate("text", x = 1, y = -0.5, label = paste0("ps01: ", convPvalue(ps01), "\n",
                                                "ps12: ", convPvalue(ps12), "\n",
                                                "ps23: ", convPvalue(ps23), "\n",
                                                "ps34: ", convPvalue(ps34), "\n",
                                                "ps24: ", convPvalue(ps24), "\n", 
                                                "ps14: ", convPvalue(ps14), "\n",
                                                "ps04: ", convPvalue(ps04), "\n"), 
           color = "black", hjust = 0, size = 2) + coord_cartesian(ylim = c(-1, 0))
  
fileName <- paste0("log2FC_barplot_psGroup_dTAG_vs_DMSO_down")
width <- panelSize(1.2)*mmToInch
height <- panelSize(1)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()



tempUp <- temp %>% dplyr::filter(log2FoldChange > 0)
 
ps01 <- round(getPvalWilcox(tempUp, "psOver0", "psOver1"), 5)
ps12 <- round(getPvalWilcox(tempUp, "psOver1", "psOver2"), 5)
ps23 <- round(getPvalWilcox(tempUp, "psOver2", "psOver3"), 5)
ps34 <- round(getPvalWilcox(tempUp, "psOver3", "psOver4"), 5)
ps24 <- round(getPvalWilcox(tempUp, "psOver2", "psOver4"), 5)
ps14 <- round(getPvalWilcox(tempUp, "psOver1", "psOver4"), 5)
ps04 <- round(getPvalWilcox(tempUp, "psOver0", "psOver4"), 5)




p <- ggplot(tempUp, aes(x = psOver, y = log2FoldChange)) + 
  scale_fill_manual(values = c("#D9D9D9", "#BFBFBF", "#A6A6A6", "#8C8C8C", "#737373")) + # Five shades of grey
  geom_violin(aes(fill = psOver), 
              color = "black",
              linewidth = lineThick * mmToLineUnit, lineend = "square",
              show.legend = FALSE) +
  geom_boxplot(width = 0.1, color = "black",
               linewidth = lineThick * mmToLineUnit, lineend = "square",
               outlier.shape = NA
  ) + theme_classic() + labs(x = NULL , y = "log2(fold change)") +
  stat_summary(
    aes(group = psOver), fun = mean,
    geom = "point", shape = 21, size = 1,
    fill = "red", color = "black"
  ) +
  geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
  theme(
    axis.title = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45, hjust = 1, vjust = 1
    ),
    axis.line = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )+
  annotate("text", x = 1, y = 0.5, label = paste0("ps01: ", convPvalue(ps01), "\n",
                                                "ps12: ", convPvalue(ps12), "\n",
                                                "ps23: ", convPvalue(ps23), "\n",
                                                "ps34: ", convPvalue(ps34), "\n",
                                                "ps24: ", convPvalue(ps24), "\n", 
                                                "ps14: ", convPvalue(ps14), "\n",
                                                "ps04: ", convPvalue(ps04), "\n"), 
           color = "black", hjust = 0, size = 2) + coord_cartesian(ylim = c(0, 1))

  
fileName <- paste0("log2FC_barplot_psGroup_dTAG_vs_DMSO_up")
width <- panelSize(1.18)*mmToInch
height <- panelSize(1)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

Grouping with P-E number

Grouping
# temp2 contains genes from group 1, 2, 5, 8 and loop counts
#calculating diff score and log2fc distribution based on p-n numbers

psOver4 <- (temp2 %>% dplyr::filter(num_pe >= 4))$gene
psOver3 <- (temp2 %>% dplyr::filter(num_pe >= 3, num_pe < 4))$gene
psOver2 <- (temp2 %>% dplyr::filter(num_pe >= 2, num_pe < 3))$gene
psOver1 <- (temp2 %>% dplyr::filter(num_pe >= 1, num_pe < 2))$gene
psOver0 <- (temp2 %>% dplyr::filter(num_pe < 1))$gene



## Dividing genes into groups
temp <- geneAnnoData %>% dplyr::select(diff_dTAG_DMSO, distance, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = mean(diff_dTAG_DMSO),
            mean_distance = mean(distance),
            .groups = 'drop')

diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)

maxLog2FC = 2

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% diff.RNA.G1.dTAG$ensembl_gene_id, "2DOWN", "0NO"),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                log2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag) %>%
  drop_na(shrinked_log2FC)

temp <- temp %>% dplyr::mutate(
  psOver = ifelse(gene %in% psOver4, "psOver4",
                  ifelse(gene %in% psOver3, "psOver3",
                         ifelse(gene %in% psOver2, "psOver2",
                                ifelse(gene %in% psOver1, "psOver1",
                                       ifelse(gene %in% psOver0, "psOver0", NA)))))) %>%
  drop_na(psOver)
GO for each group
GOfigDir <- here(figDir, "../GO")
#####################
GO0.df <- as.data.frame(enrichGO(gene = psOver0, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))
GO1.df <- as.data.frame(enrichGO(gene = psOver1, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))
GO2.df <- as.data.frame(enrichGO(gene = psOver2, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))
GO3.df <- as.data.frame(enrichGO(gene = psOver3, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))
GO4.df <- as.data.frame(enrichGO(gene = psOver4, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))

# fwrite(GO0.df, here("GO_PE_group0.tsv"), sep = "\t")
# fwrite(GO1.df, here("GO_PE_group1.tsv"), sep = "\t")
# fwrite(GO2.df, here("GO_PE_group2.tsv"), sep = "\t")
# fwrite(GO3.df, here("GO_PE_group3.tsv"), sep = "\t")
# fwrite(GO4.df, here("GO_PE_group4.tsv"), sep = "\t")


subset0 <- GO0.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 0") %>%
  dplyr::arrange(p.adjust)
subset1 <- GO1.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 1") %>%
  dplyr::arrange(p.adjust)
subset2 <- GO2.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 2") %>%
  dplyr::arrange(p.adjust)
subset3 <- GO3.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 3") %>%
  dplyr::arrange(p.adjust)
subset4 <- GO4.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 4") %>%
  dplyr::arrange(p.adjust)

subset0$GeneRatio <- sapply(strsplit(subset0$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))
subset1$GeneRatio <- sapply(strsplit(subset1$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))
subset2$GeneRatio <- sapply(strsplit(subset2$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))
subset3$GeneRatio <- sapply(strsplit(subset3$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))
subset4$GeneRatio <- sapply(strsplit(subset4$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))

GOlist <- factor(c("GO:0008380", "GO:0006397", "GO:0034470", "GO:0022613", "GO:0016055",
                   "GO:0061138", "GO:0060562", "GO:0007389", "GO:0060485", "GO:0048638", "GO:0045664",
                   "GO:0040029"))

data <- bind_rows(bind_rows(bind_rows(subset0, subset1), subset2), subset4) %>%
  dplyr::filter(ID %in% GOlist)

descOrder <- sort(unique(data$Description))[rev(c(11, 5, 6, 10, 12,
                                                  1,
                                                  3, 7, 4, 2, 9, 8))]

empty_row3 <- data.frame(
  ID = NA,                             # No specific ID
  Description = NA,                    # No description
  GeneRatio = NA,                      # No gene ratio
  p.adjust = NA,                       # No p.adjust value
  group = "Group 3"                   # Group to add as empty column
)

empty_row4 <- data.frame(
  ID = NA,                             # No specific ID
  Description = NA,                    # No description
  GeneRatio = NA,                      # No gene ratio
  p.adjust = NA,                       # No p.adjust value
  group = "Group 4"                   # Group to add as empty column
)

# Append the empty row to your dataset
data <- rbind(data, empty_row3, empty_row4)


data <- data %>% dplyr::rowwise() %>% dplyr::mutate(pValueLog = min(-log10(p.adjust), pValueLogMax))
p <- ggplot(data, aes(x = group, y = factor(Description, levels = descOrder), size = pValueLog, fill = GeneRatio)) + 
  geom_point(shape = 21,        # Ensures a point with an outline
             stroke = 1*ptToMM) + 
  scale_size_continuous(range = c(0.5, 2)) +  # Set min and max point sizes here
  scale_fill_gradient(low = "white", high = "#CB333A",
                      # limits = c(0, 1),
                      oob = scales::squish, # Define gradient colors
                      guide = guide_colorbar(
                        barwidth = 1.5/5.08,  # Adjust width of the color bar
                        barheight = 15/5.08   # Adjust height of the color bar
                      )
  ) + 
  labs(x = NULL, y = NULL) +
  theme_bw() +  # Apply theme_bw first, so custom theme settings come after
  theme(
    panel.background = element_rect(fill = "transparent"),  # Override theme_bw panel
    axis.title = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.text.x = element_text(
      size = fontSizeS,         # Ensure size is set for x-axis text
      family = fontType,
      color = "#000000",
    ),
    axis.text.y = element_text(
      size = fontSizeS,         # Ensure size is set for y-axis text
      family = fontType,
      color = "#000000",
      lineheight = 0.9          # Allows wrapping for y-axis labels to fit into 2 lines
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick * mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick * mmToLineUnit,
      lineend = "square"
    ),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )

fileName <- here(figDir, "..", "GO", "GO_groups_pe")
width <- panelSize(2.65)*mmToInch
height <- panelSize(1.3)*mmToInch
svglite(paste0(fileName, ".svg"), height = height, width = width)
print(p)
dev.off()
loop score
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(psOver ==group1) )$mean_diff_score
  distance2 <- (data %>% dplyr::filter(psOver ==group2) )$mean_diff_score
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

ps01 <- round(getPvalWilcox(temp, "psOver0", "psOver1"), 5)
ps12 <- round(getPvalWilcox(temp, "psOver1", "psOver2"), 5)
ps23 <- round(getPvalWilcox(temp, "psOver2", "psOver3"), 5)
ps34 <- round(getPvalWilcox(temp, "psOver3", "psOver4"), 5)
ps24 <- round(getPvalWilcox(temp, "psOver2", "psOver4"), 5)
ps14 <- round(getPvalWilcox(temp, "psOver1", "psOver4"), 5)
ps04 <- round(getPvalWilcox(temp, "psOver0", "psOver4"), 5)


p <- ggplot(temp, aes(x = psOver, y = mean_diff_score)) + 
    scale_fill_manual(values = c("#777777", "#8B7E65", "#A28452", "#C2884D", "#F28E2C")) +
  geom_violin(aes(fill = psOver), 
              color = "black",
              linewidth = lineMedium * mmToLineUnit, lineend = "square",
              show.legend = FALSE) +
  geom_boxplot(width = 0.3, color = "black",
               linewidth = lineMedium * mmToLineUnit, lineend = "square",
               outlier.shape = NA
  ) + theme_classic() + labs(x = NULL , y = "Average Δ loop score") +
  stat_summary(
    aes(group = psOver), fun = mean,
    geom = "point", shape = 21, size = 0.5,
    fill = "black", color = "black"
  ) +
  geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
  theme(
    axis.title = element_text(
      size = fontSizeM, family = fontType, color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45, hjust = 1, vjust = 1
    ),
    axis.line = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )+
  annotate("text", x = 1, y = 0, label = paste0("ps01: ", convPvalue(ps01), "\n",
                                                "ps12: ", convPvalue(ps12), "\n",
                                                "ps23: ", convPvalue(ps23), "\n",
                                                "ps34: ", convPvalue(ps34), "\n",
                                                "ps24: ", convPvalue(ps24), "\n", 
                                                "ps14: ", convPvalue(ps14), "\n",
                                                "ps04: ", convPvalue(ps04), "\n"), 
           color = "black", hjust = 0, size = 2) +
  coord_cartesian(ylim = c(-0.5, 0.1))


fileName <- paste0("diffScore_barplot_peGroup_dTAG_vs_DMSO")
width <- 33*mmToInch
height <-33*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
log2FC
temp <- temp %>% dplyr::mutate(absLog2FC = abs(log2FoldChange))
p <- ggplot(temp, aes(x = absLog2FC, color = psOver)) +
scale_color_manual(values = (c("#777777", "#8B7E65", "#A28452", "#C2884D", "#F28E2C"))) +
  stat_ecdf(size = 0.4, linewidth = lineMedium * mmToLineUnit, lineend = "square" ) + # Use stat_ecdf to plot the empirical CDF
  labs(
    x = "Abs. log2(fold change)",
    y = "Cumulative Probability"
  ) + coord_cartesian(xlim = c(0, 1.5)) +
  theme_classic() + # Clean theme
  theme(
      axis.title = element_text(
        size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.position = "none",
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
    ) + scale_y_continuous(labels = scales::number_format(accuracy = 0.1))
fileName <- paste0("log2FC_cdf_peGroup_dTAG_vs_DMSO")
width <- 33*mmToInch
height <-33*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()


#
 getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(psOver ==group1) )$log2FoldChange
  distance2 <- (data %>% dplyr::filter(psOver ==group2) )$log2FoldChange
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

tempDown <- temp %>% dplyr::filter(log2FoldChange < 0)
 
ps01 <- round(getPvalWilcox(tempDown, "psOver0", "psOver1"), 5)
ps12 <- round(getPvalWilcox(tempDown, "psOver1", "psOver2"), 5)
ps23 <- round(getPvalWilcox(tempDown, "psOver2", "psOver3"), 5)
ps34 <- round(getPvalWilcox(tempDown, "psOver3", "psOver4"), 5)
ps24 <- round(getPvalWilcox(tempDown, "psOver2", "psOver4"), 5)
ps14 <- round(getPvalWilcox(tempDown, "psOver1", "psOver4"), 5)

ps04 <- round(getPvalWilcox(tempDown, "psOver0", "psOver4"), 5)


p <- ggplot(tempDown, aes(x = psOver, y = log2FoldChange)) + 
  scale_fill_manual(values = c("#D9D9D9", "#BFBFBF", "#A6A6A6", "#8C8C8C", "#737373")) + # Five shades of grey
  geom_violin(aes(fill = psOver), 
              color = "black",
              linewidth = lineThick * mmToLineUnit, lineend = "square",
              show.legend = FALSE) +
  geom_boxplot(width = 0.1, color = "black",
               linewidth = lineThick * mmToLineUnit, lineend = "square",
               outlier.shape = NA
  ) + theme_classic() + labs(x = NULL , y = "log2(fold change)") +
  stat_summary(
    aes(group = psOver), fun = mean,
    geom = "point", shape = 21, size = 1,
    fill = "red", color = "black"
  ) +
  geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
  theme(
    axis.title = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45, hjust = 1, vjust = 1
    ),
    axis.line = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )+
  annotate("text", x = 1, y = -0.5, label = paste0("ps01: ", convPvalue(ps01), "\n",
                                                "ps12: ", convPvalue(ps12), "\n",
                                                "ps23: ", convPvalue(ps23), "\n",
                                                "ps34: ", convPvalue(ps34), "\n",
                                                "ps24: ", convPvalue(ps24), "\n", 
                                                "ps14: ", convPvalue(ps14), "\n",
                                                "ps04: ", convPvalue(ps04), "\n"), 
           color = "black", hjust = 0, size = 2) + coord_cartesian(ylim = c(-1, 0))
  
fileName <- paste0("log2FC_barplot_peGroup_dTAG_vs_DMSO_down")
width <- panelSize(1.2)*mmToInch
height <- panelSize(1)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()



tempUp <- temp %>% dplyr::filter(log2FoldChange > 0)
 
ps01 <- round(getPvalWilcox(tempUp, "psOver0", "psOver1"), 5)
ps12 <- round(getPvalWilcox(tempUp, "psOver1", "psOver2"), 5)
ps23 <- round(getPvalWilcox(tempUp, "psOver2", "psOver3"), 5)
ps34 <- round(getPvalWilcox(tempUp, "psOver3", "psOver4"), 5)
ps24 <- round(getPvalWilcox(tempUp, "psOver2", "psOver4"), 5)
ps14 <- round(getPvalWilcox(tempUp, "psOver1", "psOver4"), 5)
ps04 <- round(getPvalWilcox(tempUp, "psOver0", "psOver4"), 5)




p <- ggplot(tempUp, aes(x = psOver, y = log2FoldChange)) + 
  scale_fill_manual(values = c("#D9D9D9", "#BFBFBF", "#A6A6A6", "#8C8C8C", "#737373")) + # Five shades of grey
  geom_violin(aes(fill = psOver), 
              color = "black",
              linewidth = lineThick * mmToLineUnit, lineend = "square",
              show.legend = FALSE) +
  geom_boxplot(width = 0.1, color = "black",
               linewidth = lineThick * mmToLineUnit, lineend = "square",
               outlier.shape = NA
  ) + theme_classic() + labs(x = NULL , y = "log2(fold change)") +
  stat_summary(
    aes(group = psOver), fun = mean,
    geom = "point", shape = 21, size = 1,
    fill = "red", color = "black"
  ) +
  geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
  theme(
    axis.title = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45, hjust = 1, vjust = 1
    ),
    axis.line = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )+
  annotate("text", x = 1, y = 0.5, label = paste0("ps01: ", convPvalue(ps01), "\n",
                                                "ps12: ", convPvalue(ps12), "\n",
                                                "ps23: ", convPvalue(ps23), "\n",
                                                "ps34: ", convPvalue(ps34), "\n",
                                                "ps24: ", convPvalue(ps24), "\n", 
                                                "ps14: ", convPvalue(ps14), "\n",
                                                "ps04: ", convPvalue(ps04), "\n"), 
           color = "black", hjust = 0, size = 2) + coord_cartesian(ylim = c(0, 1))

  
fileName <- paste0("log2FC_barplot_peGroup_dTAG_vs_DMSO_up")
width <- panelSize(1.18)*mmToInch
height <- panelSize(1)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

Grouping with P-P number

Grouping
# temp2 contains genes from group 1, 2, 5, 8 and loop counts
#calculating diff score and log2fc distribution based on p-n numbers

psOver4 <- (temp2 %>% dplyr::filter(num_pp >= 4))$gene
psOver3 <- (temp2 %>% dplyr::filter(num_pp >= 3, num_pp < 4))$gene
psOver2 <- (temp2 %>% dplyr::filter(num_pp >= 2, num_pp < 3))$gene
psOver1 <- (temp2 %>% dplyr::filter(num_pp >= 1, num_pp < 2))$gene
psOver0 <- (temp2 %>% dplyr::filter(num_pp < 1))$gene



## Dividing genes into groups
temp <- geneAnnoData %>% dplyr::select(diff_dTAG_DMSO, distance, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = mean(diff_dTAG_DMSO),
            mean_distance = mean(distance),
            .groups = 'drop')

diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)

maxLog2FC = 2

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% diff.RNA.G1.dTAG$ensembl_gene_id, "2DOWN", "0NO"),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                log2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag) %>%
  drop_na(shrinked_log2FC)

temp <- temp %>% dplyr::mutate(
  psOver = ifelse(gene %in% psOver4, "psOver4",
                  ifelse(gene %in% psOver3, "psOver3",
                         ifelse(gene %in% psOver2, "psOver2",
                                ifelse(gene %in% psOver1, "psOver1",
                                       ifelse(gene %in% psOver0, "psOver0", NA)))))) %>%
  drop_na(psOver)
GO for each group
GOfigDir <- here(figDir, "../GO")

#####################
GO0.df <- as.data.frame(enrichGO(gene = psOver0, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))
GO1.df <- as.data.frame(enrichGO(gene = psOver1, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))
GO2.df <- as.data.frame(enrichGO(gene = psOver2, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))
GO3.df <- as.data.frame(enrichGO(gene = psOver3, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))
GO4.df <- as.data.frame(enrichGO(gene = psOver4, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))
# 
# fwrite(GO0.df, here("GO_PP_group0.tsv"), sep = "\t")
# fwrite(GO1.df, here("GO_PP_group1.tsv"), sep = "\t")
# fwrite(GO2.df, here("GO_PP_group2.tsv"), sep = "\t")
# fwrite(GO3.df, here("GO_PP_group3.tsv"), sep = "\t")
# fwrite(GO4.df, here("GO_PP_group4.tsv"), sep = "\t")

subset0 <- GO0.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 0") %>%
  dplyr::arrange(p.adjust)
subset1 <- GO1.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 1") %>%
  dplyr::arrange(p.adjust)
subset2 <- GO2.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 2") %>%
  dplyr::arrange(p.adjust)
subset3 <- GO3.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 3") %>%
  dplyr::arrange(p.adjust)
subset4 <- GO4.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 4") %>%
  dplyr::arrange(p.adjust)

subset0$GeneRatio <- sapply(strsplit(subset0$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))
subset1$GeneRatio <- sapply(strsplit(subset1$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))
subset2$GeneRatio <- sapply(strsplit(subset2$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))
subset3$GeneRatio <- sapply(strsplit(subset3$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))
subset4$GeneRatio <- sapply(strsplit(subset4$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))

GOlist <- factor(c("GO:0008380", "GO:0006397", "GO:0034470", "GO:0022613", "GO:0016055",
                   "GO:0061138", "GO:0060562", "GO:0007389", "GO:0060485", "GO:0048638", "GO:0045664"))

data <- bind_rows(subset0, subset1) %>%
  dplyr::filter(ID %in% GOlist)



empty_row2 <- data.frame(
  ID = NA,                             # No specific ID
  Description = NA,                    # No description
  GeneRatio = NA,                      # No gene ratio
  p.adjust = NA,                       # No p.adjust value
  group = "Group 2"                   # Group to add as empty column
)

empty_row3 <- data.frame(
  ID = NA,                             # No specific ID
  Description = NA,                    # No description
  GeneRatio = NA,                      # No gene ratio
  p.adjust = NA,                       # No p.adjust value
  group = "Group 3"                   # Group to add as empty column
)

empty_row4 <- data.frame(
  ID = NA,                             # No specific ID
  Description = NA,                    # No description
  GeneRatio = NA,                      # No gene ratio
  p.adjust = NA,                       # No p.adjust value
  group = "Group 4"                   # Group to add as empty column
)

# Append the empty row to your dataset
data <- rbind(data, empty_row2, empty_row3, empty_row4)
descOrder <- sort(unique(data$Description))[rev(c(12,
                                                  4, 5, 9, 11, 10, 
                                                  2, 6, 3, 1, 8, 7))]
data <- data %>% dplyr::filter(Description %in% descOrder)


data <- data %>% dplyr::rowwise() %>% dplyr::mutate(pValueLog = min(-log10(p.adjust), pValueLogMax))
p <- ggplot(data, aes(x = group, y = factor(Description, levels = descOrder), size = pValueLog, fill = GeneRatio)) + 
  geom_point(shape = 21,        # Ensures a point with an outline
             stroke = 1*ptToMM) + 
  scale_size_continuous(range = c(0.5, 2)) +  # Set min and max point sizes here
  scale_fill_gradient(low = "white", high = "#CB333A",
                      # limits = c(0, 1),
                      oob = scales::squish, # Define gradient colors
                      guide = guide_colorbar(
                        barwidth = 1.5/5.08,  # Adjust width of the color bar
                        barheight = 15/5.08   # Adjust height of the color bar
                      )
  ) + 
  labs(x = NULL, y = NULL) +
  theme_bw() +  # Apply theme_bw first, so custom theme settings come after
  theme(
    panel.background = element_rect(fill = "transparent"),  # Override theme_bw panel
    axis.title = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.text.x = element_text(
      size = fontSizeS,         # Ensure size is set for x-axis text
      family = fontType,
      color = "#000000",
    ),
    axis.text.y = element_text(
      size = fontSizeS,         # Ensure size is set for y-axis text
      family = fontType,
      color = "#000000",
      lineheight = 0.9          # Allows wrapping for y-axis labels to fit into 2 lines
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick * mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick * mmToLineUnit,
      lineend = "square"
    ),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )

fileName <- here(figDir, "..", "GO", "GO_groups_pp")
width <- panelSize(2.65)*mmToInch
height <- panelSize(1.2)*mmToInch
svglite(paste0(fileName, ".svg"), height = height, width = width)
print(p)
dev.off()
loop score
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(psOver ==group1) )$mean_diff_score
  distance2 <- (data %>% dplyr::filter(psOver ==group2) )$mean_diff_score
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

ps01 <- round(getPvalWilcox(temp, "psOver0", "psOver1"), 5)
ps12 <- round(getPvalWilcox(temp, "psOver1", "psOver2"), 5)
ps23 <- round(getPvalWilcox(temp, "psOver2", "psOver3"), 5)
ps34 <- round(getPvalWilcox(temp, "psOver3", "psOver4"), 5)
ps24 <- round(getPvalWilcox(temp, "psOver2", "psOver4"), 5)
ps14 <- round(getPvalWilcox(temp, "psOver1", "psOver4"), 5)
ps04 <- round(getPvalWilcox(temp, "psOver0", "psOver4"), 5)

p <- ggplot(temp, aes(x = psOver, y = mean_diff_score)) + 
    scale_fill_manual(values = rev(c("#777777", "#8B7E65", "#A28452", "#C2884D", "#F28E2C"))) +
  geom_violin(aes(fill = psOver), 
              color = "black",
              linewidth = lineMedium * mmToLineUnit, lineend = "square",
              show.legend = FALSE) +
  geom_boxplot(width = 0.3, color = "black",
               linewidth = lineMedium * mmToLineUnit, lineend = "square",
               outlier.shape = NA
  ) + theme_classic() + labs(x = NULL , y = "Average Δ loop score") +
  stat_summary(
    aes(group = psOver), fun = mean,
    geom = "point", shape = 21, size = 0.5,
    fill = "black", color = "black"
  ) +
  geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
  theme(
    axis.title = element_text(
      size = fontSizeM, family = fontType, color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45, hjust = 1, vjust = 1
    ),
    axis.line = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )+
  annotate("text", x = 1, y = 0, label = paste0("ps01: ", convPvalue(ps01), "\n",
                                                "ps12: ", convPvalue(ps12), "\n",
                                                "ps23: ", convPvalue(ps23), "\n",
                                                "ps34: ", convPvalue(ps34), "\n",
                                                "ps24: ", convPvalue(ps24), "\n", 
                                                "ps14: ", convPvalue(ps14), "\n",
                                                "ps04: ", convPvalue(ps04), "\n"), 
           color = "black", hjust = 0, size = 2) +
  coord_cartesian(ylim = c(-0.5, 0.1))


fileName <- paste0("diffScore_barplot_ppGroup_dTAG_vs_DMSO")
width <- 33*mmToInch
height <-33*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
log2FC
temp <- temp %>% dplyr::mutate(absLog2FC = abs(log2FoldChange))
p <- ggplot(temp, aes(x = absLog2FC, color = psOver)) +
scale_color_manual(values = (rev(c("#777777", "#8B7E65", "#A28452", "#C2884D", "#F28E2C")))) +
  stat_ecdf(size = 0.4, linewidth = lineMedium * mmToLineUnit, lineend = "square" ) + # Use stat_ecdf to plot the empirical CDF
  labs(
    x = "Abs. log2(fold change)",
    y = "Cumulative Probability"
  ) + coord_cartesian(xlim = c(0, 1.5)) +
  theme_classic() + # Clean theme
  theme(
      axis.title = element_text(
        size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.position = "none",
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
    ) + scale_y_continuous(labels = scales::number_format(accuracy = 0.1))
fileName <- paste0("log2FC_cdf_ppGroup_dTAG_vs_DMSO")
width <- 33*mmToInch
height <-33*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()



#
 getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(psOver ==group1) )$log2FoldChange
  distance2 <- (data %>% dplyr::filter(psOver ==group2) )$log2FoldChange
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

tempDown <- temp %>% dplyr::filter(log2FoldChange < 0)
 
ps01 <- round(getPvalWilcox(tempDown, "psOver0", "psOver1"), 5)
ps12 <- round(getPvalWilcox(tempDown, "psOver1", "psOver2"), 5)
ps23 <- round(getPvalWilcox(tempDown, "psOver2", "psOver3"), 5)
ps34 <- round(getPvalWilcox(tempDown, "psOver3", "psOver4"), 5)
ps24 <- round(getPvalWilcox(tempDown, "psOver2", "psOver4"), 5)
ps14 <- round(getPvalWilcox(tempDown, "psOver1", "psOver4"), 5)

ps04 <- round(getPvalWilcox(tempDown, "psOver0", "psOver4"), 5)


p <- ggplot(tempDown, aes(x = psOver, y = log2FoldChange)) + 
  scale_fill_manual(values = c("#D9D9D9", "#BFBFBF", "#A6A6A6", "#8C8C8C", "#737373")) + # Five shades of grey
  geom_violin(aes(fill = psOver), 
              color = "black",
              linewidth = lineThick * mmToLineUnit, lineend = "square",
              show.legend = FALSE) +
  geom_boxplot(width = 0.1, color = "black",
               linewidth = lineThick * mmToLineUnit, lineend = "square",
               outlier.shape = NA
  ) + theme_classic() + labs(x = NULL , y = "log2(fold change)") +
  stat_summary(
    aes(group = psOver), fun = mean,
    geom = "point", shape = 21, size = 1,
    fill = "red", color = "black"
  ) +
  geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
  theme(
    axis.title = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45, hjust = 1, vjust = 1
    ),
    axis.line = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )+
  annotate("text", x = 1, y = -0.5, label = paste0("ps01: ", convPvalue(ps01), "\n",
                                                "ps12: ", convPvalue(ps12), "\n",
                                                "ps23: ", convPvalue(ps23), "\n",
                                                "ps34: ", convPvalue(ps34), "\n",
                                                "ps24: ", convPvalue(ps24), "\n", 
                                                "ps14: ", convPvalue(ps14), "\n",
                                                "ps04: ", convPvalue(ps04), "\n"), 
           color = "black", hjust = 0, size = 2) + coord_cartesian(ylim = c(-1, 0))
  
fileName <- paste0("log2FC_barplot_ppGroup_dTAG_vs_DMSO_down")
width <- panelSize(1.2)*mmToInch
height <- panelSize(1)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()



tempUp <- temp %>% dplyr::filter(log2FoldChange > 0)
 
ps01 <- round(getPvalWilcox(tempUp, "psOver0", "psOver1"), 5)
ps12 <- round(getPvalWilcox(tempUp, "psOver1", "psOver2"), 5)
ps23 <- round(getPvalWilcox(tempUp, "psOver2", "psOver3"), 5)
ps34 <- round(getPvalWilcox(tempUp, "psOver3", "psOver4"), 5)
ps24 <- round(getPvalWilcox(tempUp, "psOver2", "psOver4"), 5)
ps14 <- round(getPvalWilcox(tempUp, "psOver1", "psOver4"), 5)
ps04 <- round(getPvalWilcox(tempUp, "psOver0", "psOver4"), 5)




p <- ggplot(tempUp, aes(x = psOver, y = log2FoldChange)) + 
  scale_fill_manual(values = c("#D9D9D9", "#BFBFBF", "#A6A6A6", "#8C8C8C", "#737373")) + # Five shades of grey
  geom_violin(aes(fill = psOver), 
              color = "black",
              linewidth = lineThick * mmToLineUnit, lineend = "square",
              show.legend = FALSE) +
  geom_boxplot(width = 0.1, color = "black",
               linewidth = lineThick * mmToLineUnit, lineend = "square",
               outlier.shape = NA
  ) + theme_classic() + labs(x = NULL , y = "log2(fold change)") +
  stat_summary(
    aes(group = psOver), fun = mean,
    geom = "point", shape = 21, size = 1,
    fill = "red", color = "black"
  ) +
  geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
  theme(
    axis.title = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45, hjust = 1, vjust = 1
    ),
    axis.line = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )+
  annotate("text", x = 1, y = 0.5, label = paste0("ps01: ", convPvalue(ps01), "\n",
                                                "ps12: ", convPvalue(ps12), "\n",
                                                "ps23: ", convPvalue(ps23), "\n",
                                                "ps34: ", convPvalue(ps34), "\n",
                                                "ps24: ", convPvalue(ps24), "\n", 
                                                "ps14: ", convPvalue(ps14), "\n",
                                                "ps04: ", convPvalue(ps04), "\n"), 
           color = "black", hjust = 0, size = 2) + coord_cartesian(ylim = c(0, 1))

  
fileName <- paste0("log2FC_barplot_ppGroup_dTAG_vs_DMSO_up")
width <- panelSize(1.18)*mmToInch
height <- panelSize(1)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

A485

Grouping with P-N number

Grouping
############
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = 0.2,
                                      annoList = c("P-P", "P-E", "P-S", "P-X"))%>% 
  dplyr::mutate(distance = start2 - start1,
                peakID = paste(chrom1, start1, start2, sep = "_"))


# temp2
#calculating diff score and log2fc distribution based on p-n numbers
temp2 <- readRDS(here(resultDir, "gene_loop_link_A485.rds"))

pnOver8 <- (temp2 %>% dplyr::filter(total >= 8))$gene
pnOver6 <- (temp2 %>% dplyr::filter(total >= 6, total < 8))$gene
pnOver4 <- (temp2 %>% dplyr::filter(total >= 4, total < 6))$gene
pnOver2 <- (temp2 %>% dplyr::filter(total >= 2, total < 4))$gene
pnOver0 <- (temp2 %>% dplyr::filter(total < 2))$gene



## Dividing genes into groups
temp <- geneAnnoData %>% dplyr::select(diff_A485_DMSO, distance, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = mean(diff_A485_DMSO),
            mean_distance = mean(distance),
            .groups = 'drop')

diff.RNA <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)

maxLog2FC = 2

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  drop_na(shrinked_log2FC)

temp <- temp %>% dplyr::mutate(
  pnOver = ifelse(gene %in% pnOver8, "pnOver8",
                  ifelse(gene %in% pnOver6, "pnOver6",
                         ifelse(gene %in% pnOver4, "pnOver4",
                                ifelse(gene %in% pnOver2, "pnOver2",
                                       ifelse(gene %in% pnOver0, "pnOver0", NA)))))) %>%
  drop_na(pnOver)
(SKIP) GO for each group
# GOfigDir <- here(figDir, "../GO")
# getGO("pnOver8", GOfigDir, pnOver8)
# getGO("pnOver6", GOfigDir, pnOver6)
# getGO("pnOver4", GOfigDir, pnOver4)
# getGO("pnOver2", GOfigDir, pnOver2)
# getGO("pnOver0", GOfigDir, pnOver0)
# 
# #####################
# GO0.df <- as.data.frame(enrichGO(gene = pnOver0, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO2.df <- as.data.frame(enrichGO(gene = pnOver2, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO4.df <- as.data.frame(enrichGO(gene = pnOver4, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO6.df <- as.data.frame(enrichGO(gene = pnOver6, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO8.df <- as.data.frame(enrichGO(gene = pnOver8, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# 
# 
# subset0 <- GO0.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "pnOver0") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# subset2 <- GO2.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "pnOver2") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# subset4 <- GO4.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "pnOver4") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# subset6 <- GO6.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "pnOver6") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# subset8 <- GO8.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "pnOver8") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# 
# 
# GOlist <- factor(c("GO:0006397", "GO:0008380", "GO:0022613", "GO:0034470",
#                    "GO:0016055", "GO:0007389", "GO:0048562", "GO:0045165", 
#                    "GO:0072001", "GO:0007517", "GO:0048705"))
# 
# data <- bind_rows(bind_rows(bind_rows(subset0, subset2), subset4), subset8) %>%
#   dplyr::filter(ID %in% GOlist)
# 
# p <- ggplot(data, aes(x = group, y = Description, color = p.adjust, size = gr)) + 
#   geom_point() + theme_bw() +
#   scale_color_gradient(low = "red", high = "blue", limits = c(0, 0.05)) +
#   scale_size_continuous(range = c(0, 3)) +
#   labs(x = NULL, y = NULL) +
#   theme(axis.text = element_text(size = 6),  # Set axis text size
#         axis.title = element_text(size = 6), # Set axis title size (if not removed)
#         legend.text = element_text(size = 6), # Set legend text size
#         legend.title = element_text(size = 6)) 
# 
# fileName <- here(figDir, "..", "GO", "GO_groups_pn")
# height = 2
# width = 3.3
# svglite(paste0(fileName, ".svg"), height = height, width = width)
# print(p)
# dev.off()
loop score
# getPvalWilcox <- function(data, group1, group2){
#   distance1 <- (data %>% dplyr::filter(pnOver ==group1) )$mean_diff_score
#   distance2 <- (data %>% dplyr::filter(pnOver ==group2) )$mean_diff_score
#   wil <- wilcox.test(distance1, distance2)
#   return(wil$p.value)
# }
# 
# pv02 <- round(getPvalWilcox(temp, "pnOver0", "pnOver2"), 5)
# pv24 <- round(getPvalWilcox(temp, "pnOver2", "pnOver4"), 5)
# pv46 <- round(getPvalWilcox(temp, "pnOver4", "pnOver6"), 5)
# pv08 <- round(getPvalWilcox(temp, "pnOver0", "pnOver8"), 5)
# pv28 <- round(getPvalWilcox(temp, "pnOver2", "pnOver8"), 5)
# pv48 <- round(getPvalWilcox(temp, "pnOver4", "pnOver8"), 5)
# pv68 <- round(getPvalWilcox(temp, "pnOver6", "pnOver8"), 5)


p <- ggplot(temp, aes(x = pnOver, y = mean_diff_score)) + geom_violin(aes(fill = pnOver), show.legend = FALSE) +
  geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() + 
  stat_summary(aes(group = pnOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  geom_hline(yintercept = 0)
  # annotate("text", x = 1, y = 0, label = paste0("pv02: ", convPvalue(pv02), "\n",
  #                                               "pv24: ", convPvalue(pv24), "\n",
  #                                               "pv46: ", convPvalue(pv46), "\n",
  #                                               "pv68: ", convPvalue(pv68), "\n",
  #                                               "pv48: ", convPvalue(pv48), "\n",
  #                                               "pv28: ", convPvalue(pv28), "\n",
  #                                               "pv08: ", convPvalue(pv08), "\n"), 
  #          color = "black", hjust = 0, size = 3)

   
fileName <- paste0("diffScore_barplot_pnGroup_A485_vs_DMSO")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
log2FC
#
#  getPvalWilcox <- function(data, group1, group2){
#   distance1 <- (data %>% dplyr::filter(pnOver ==group1) )$log2FoldChange
#   distance2 <- (data %>% dplyr::filter(pnOver ==group2) )$log2FoldChange
#   wil <- wilcox.test(distance1, distance2)
#   return(wil$p.value)
# }
# 
tempDown <- temp %>% dplyr::filter(log2FoldChange < 0)
#  
# pv02 <- round(getPvalWilcox(tempDown, "pnOver0", "pnOver2"), 5)
# pv04 <- round(getPvalWilcox(tempDown, "pnOver0", "pnOver4"), 5)
# pv06 <- round(getPvalWilcox(tempDown, "pnOver0", "pnOver6"), 5)
# pv08 <- round(getPvalWilcox(tempDown, "pnOver0", "pnOver8"), 5)
# pv24 <- round(getPvalWilcox(tempDown, "pnOver2", "pnOver4"), 5)
# pv26 <- round(getPvalWilcox(tempDown, "pnOver2", "pnOver6"), 5)
# pv28 <- round(getPvalWilcox(tempDown, "pnOver2", "pnOver8"), 5)
# pv46 <- round(getPvalWilcox(tempDown, "pnOver4", "pnOver6"), 5)
# pv48 <- round(getPvalWilcox(tempDown, "pnOver4", "pnOver8"), 5)
# pv68 <- round(getPvalWilcox(tempDown, "pnOver6", "pnOver8"), 5)




p <- ggplot(tempDown, aes(x = pnOver, y = log2FoldChange)) + geom_violin(aes(fill = pnOver), show.legend = FALSE) +
  geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() + 
  stat_summary(aes(group = pnOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  geom_hline(yintercept = 0)
  # annotate("text", x = 1, y = -0.5, label = paste0("pv02: ", convPvalue(pv02), "\n",
  #                                                  "pv04: ", convPvalue(pv04), "\n",
  #                                                  "pv06: ", convPvalue(pv06), "\n",
  #                                                  "pv08: ", convPvalue(pv08), "\n",
  #                                                  "pv24: ", convPvalue(pv24), "\n",
  #                                                  "pv26: ", convPvalue(pv26), "\n",
  #                                                  "pv28: ", convPvalue(pv28), "\n",
  #                                                  "pv46: ", convPvalue(pv46), "\n",
  #                                                  "pv48: ", convPvalue(pv48), "\n",
  #                                                  "pv68: ", convPvalue(pv68), "\n"), 
  #          color = "black", hjust = 0, size = 3) + coord_cartesian(ylim = c(-1, 0))

  
fileName <- paste0("log2FC_barplot_pnGroup_A485_vs_DMSO_down")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()



tempUp <- temp %>% dplyr::filter(log2FoldChange > 0)
#  
# pv02 <- round(getPvalWilcox(tempUp, "pnOver0", "pnOver2"), 5)
# pv04 <- round(getPvalWilcox(tempUp, "pnOver0", "pnOver4"), 5)
# pv06 <- round(getPvalWilcox(tempUp, "pnOver0", "pnOver6"), 5)
# pv08 <- round(getPvalWilcox(tempUp, "pnOver0", "pnOver8"), 5)
# pv24 <- round(getPvalWilcox(tempUp, "pnOver2", "pnOver4"), 5)
# pv26 <- round(getPvalWilcox(tempUp, "pnOver2", "pnOver6"), 5)
# pv28 <- round(getPvalWilcox(tempUp, "pnOver2", "pnOver8"), 5)
# pv46 <- round(getPvalWilcox(tempUp, "pnOver4", "pnOver6"), 5)
# pv48 <- round(getPvalWilcox(tempUp, "pnOver4", "pnOver8"), 5)
# pv68 <- round(getPvalWilcox(tempUp, "pnOver6", "pnOver8"), 5)
# 



p <- ggplot(tempUp, aes(x = pnOver, y = log2FoldChange)) + geom_violin(aes(fill = pnOver), show.legend = FALSE) +
  geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() + 
  stat_summary(aes(group = pnOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  geom_hline(yintercept = 0)
  # annotate("text", x = 1, y = 0.5, label = paste0("pv02: ", convPvalue(pv02), "\n",
  #                                                  "pv04: ", convPvalue(pv04), "\n",
  #                                                  "pv06: ", convPvalue(pv06), "\n",
  #                                                  "pv08: ", convPvalue(pv08), "\n",
  #                                                  "pv24: ", convPvalue(pv24), "\n",
  #                                                  "pv26: ", convPvalue(pv26), "\n",
  #                                                  "pv28: ", convPvalue(pv28), "\n",
  #                                                  "pv46: ", convPvalue(pv46), "\n",
  #                                                  "pv48: ", convPvalue(pv48), "\n",
  #                                                  "pv68: ", convPvalue(pv68), "\n"), 
  #          color = "black", hjust = 0, size = 3) + coord_cartesian(ylim = c(0, 1))

  
fileName <- paste0("log2FC_barplot_pnGroup_A485_vs_DMSO_up")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

Grouping with P-S number

Grouping
# temp2 contains genes from group 1, 2, 5, 8 and loop counts
#calculating diff score and log2fc distribution based on p-n numbers

psOver4 <- (temp2 %>% dplyr::filter(num_ps >= 4))$gene
psOver3 <- (temp2 %>% dplyr::filter(num_ps >= 3, num_ps < 4))$gene
psOver2 <- (temp2 %>% dplyr::filter(num_ps >= 2, num_ps < 3))$gene
psOver1 <- (temp2 %>% dplyr::filter(num_ps >= 1, num_ps < 2))$gene
psOver0 <- (temp2 %>% dplyr::filter(num_ps < 1))$gene




## Dividing genes into groups
temp <- geneAnnoData %>% dplyr::select(diff_A485_DMSO, distance, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = mean(diff_A485_DMSO),
            mean_distance = mean(distance),
            .groups = 'drop')

diff.RNA <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)

maxLog2FC = 2

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  drop_na(shrinked_log2FC)


temp <- temp %>% dplyr::mutate(
  psOver = ifelse(gene %in% psOver4, "psOver4",
                  ifelse(gene %in% psOver3, "psOver3",
                         ifelse(gene %in% psOver2, "psOver2",
                                ifelse(gene %in% psOver1, "psOver1",
                                       ifelse(gene %in% psOver0, "psOver0", NA)))))) %>%
  drop_na(psOver)
(SKIP) GO for each group
# GOfigDir <- here(figDir, "../GO")
# getGO("psOver4", GOfigDir, psOver4)
# getGO("psOver3", GOfigDir, psOver3)
# getGO("psOver2", GOfigDir, psOver2)
# getGO("psOver1", GOfigDir, psOver1)
# getGO("psOver0", GOfigDir, psOver0)
# 
# #####################
# GO0.df <- as.data.frame(enrichGO(gene = psOver0, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO1.df <- as.data.frame(enrichGO(gene = psOver1, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO2.df <- as.data.frame(enrichGO(gene = psOver2, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO3.df <- as.data.frame(enrichGO(gene = psOver3, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO4.df <- as.data.frame(enrichGO(gene = psOver4, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# 
# 
# subset0 <- GO0.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver0") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# subset1 <- GO1.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver1") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# subset2 <- GO2.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver2") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# subset3 <- GO3.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver3") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# subset4 <- GO4.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver4") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# 
# 
# GOlist <- factor(c("GO:0006397", "GO:0008380", "GO:0022613", "GO:0034470",
#                    "GO:0016055", "GO:0007389", "GO:0048562", "GO:0045165", 
#                    "GO:0072001", "GO:0007517", "GO:0048705", "GO:0003002"))
# 
# data <- bind_rows(bind_rows(bind_rows(bind_rows(subset0, subset1), subset2), subset3), subset4) %>%
#   dplyr::filter(ID %in% GOlist)
# 
# p <- ggplot(data, aes(x = group, y = Description, color = p.adjust, size = gr)) + 
#   geom_point() + theme_bw() +
#   scale_color_gradient(low = "red", high = "blue", limits = c(0, 0.05)) +
#   scale_size_continuous(range = c(0, 3)) +
#   labs(x = NULL, y = NULL) +
#   theme(axis.text = element_text(size = 6),  # Set axis text size
#         axis.title = element_text(size = 6), # Set axis title size (if not removed)
#         legend.text = element_text(size = 6), # Set legend text size
#         legend.title = element_text(size = 6)) 
# 
# fileName <- here(figDir, "..", "GO", "GO_groups_ps")
# height = 2
# width = 3.4
# svglite(paste0(fileName, ".svg"), height = height, width = width)
# print(p)
# dev.off()
loop score
# getPvalWilcox <- function(data, group1, group2){
#   distance1 <- (data %>% dplyr::filter(psOver ==group1) )$mean_diff_score
#   distance2 <- (data %>% dplyr::filter(psOver ==group2) )$mean_diff_score
#   wil <- wilcox.test(distance1, distance2)
#   return(wil$p.value)
# }
# 
# ps01 <- round(getPvalWilcox(temp, "psOver0", "psOver1"), 5)
# ps12 <- round(getPvalWilcox(temp, "psOver1", "psOver2"), 5)
# ps23 <- round(getPvalWilcox(temp, "psOver2", "psOver3"), 5)
# ps34 <- round(getPvalWilcox(temp, "psOver3", "psOver4"), 5)
# ps24 <- round(getPvalWilcox(temp, "psOver2", "psOver4"), 5)
# ps14 <- round(getPvalWilcox(temp, "psOver1", "psOver4"), 5)
# ps04 <- round(getPvalWilcox(temp, "psOver0", "psOver4"), 5)
# 

p <- ggplot(temp, aes(x = psOver, y = mean_diff_score)) + geom_violin(aes(fill = psOver), show.legend = FALSE) +
  geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() + 
  stat_summary(aes(group = psOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  geom_hline(yintercept = 0)
  # annotate("text", x = 1, y = 0, label = paste0("ps01: ", convPvalue(ps01), "\n",
  #                                               "ps12: ", convPvalue(ps12), "\n",
  #                                               "ps23: ", convPvalue(ps23), "\n",
  #                                               "ps34: ", convPvalue(ps34), "\n",
  #                                               "ps24: ", convPvalue(ps24), "\n", 
  #                                               "ps14: ", convPvalue(ps14), "\n",
  #                                               "ps04: ", convPvalue(ps04), "\n"), 
  #          color = "black", hjust = 0, size = 3)

  
fileName <- paste0("diffScore_barplot_psGroup_A485_vs_DMSO")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
log2FC
#
#  getPvalWilcox <- function(data, group1, group2){
#   distance1 <- (data %>% dplyr::filter(psOver ==group1) )$log2FoldChange
#   distance2 <- (data %>% dplyr::filter(psOver ==group2) )$log2FoldChange
#   wil <- wilcox.test(distance1, distance2)
#   return(wil$p.value)
# }

tempDown <- temp %>% dplyr::filter(log2FoldChange < 0)
#  
# ps01 <- round(getPvalWilcox(tempDown, "psOver0", "psOver1"), 5)
# ps12 <- round(getPvalWilcox(tempDown, "psOver1", "psOver2"), 5)
# ps23 <- round(getPvalWilcox(tempDown, "psOver2", "psOver3"), 5)
# ps34 <- round(getPvalWilcox(tempDown, "psOver3", "psOver4"), 5)
# ps24 <- round(getPvalWilcox(tempDown, "psOver2", "psOver4"), 5)
# ps14 <- round(getPvalWilcox(tempDown, "psOver1", "psOver4"), 5)
# 
# ps04 <- round(getPvalWilcox(tempDown, "psOver0", "psOver4"), 5)


p <- ggplot(tempDown, aes(x = psOver, y = log2FoldChange)) + geom_violin(aes(fill = psOver), show.legend = FALSE) +
  geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() + 
  stat_summary(aes(group = psOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  geom_hline(yintercept = 0)
  # annotate("text", x = 1, y = -0.5, label = paste0("ps01: ", convPvalue(ps01), "\n",
  #                                               "ps12: ", convPvalue(ps12), "\n",
  #                                               "ps23: ", convPvalue(ps23), "\n",
  #                                               "ps34: ", convPvalue(ps34), "\n",
  #                                               "ps24: ", convPvalue(ps24), "\n",
  #                                               "ps14: ", convPvalue(ps14), "\n",
  #                                               "ps04: ", convPvalue(ps04), "\n"), 
  #          color = "black", hjust = 0, size = 3) + coord_cartesian(ylim = c(-1, 0))

  
fileName <- paste0("log2FC_barplot_psGroup_A485_vs_DMSO_down")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()



tempUp <- temp %>% dplyr::filter(log2FoldChange > 0)
#  
# ps01 <- round(getPvalWilcox(tempUp, "psOver0", "psOver1"), 5)
# ps12 <- round(getPvalWilcox(tempUp, "psOver1", "psOver2"), 5)
# ps23 <- round(getPvalWilcox(tempUp, "psOver2", "psOver3"), 5)
# ps34 <- round(getPvalWilcox(tempUp, "psOver3", "psOver4"), 5)
# ps24 <- round(getPvalWilcox(tempUp, "psOver2", "psOver4"), 5)
# ps14 <- round(getPvalWilcox(tempUp, "psOver1", "psOver4"), 5)
# ps04 <- round(getPvalWilcox(tempUp, "psOver0", "psOver4"), 5)
# 



p <- ggplot(tempUp, aes(x = psOver, y = log2FoldChange)) + geom_violin(aes(fill = psOver), show.legend = FALSE) +
  geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() + 
  stat_summary(aes(group = psOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  geom_hline(yintercept = 0)
  # annotate("text", x = 1, y = 0.5, label = paste0("ps01: ", convPvalue(ps01), "\n",
  #                                               "ps12: ", convPvalue(ps12), "\n",
  #                                               "ps23: ", convPvalue(ps23), "\n",
  #                                               "ps34: ", convPvalue(ps34), "\n",
  #                                               "ps24: ", convPvalue(ps24), "\n",
  #                                               "ps14: ", convPvalue(ps14), "\n",
  #                                               "ps04: ", convPvalue(ps04), "\n"), 
  #          color = "black", hjust = 0, size = 3) + coord_cartesian(ylim = c(0, 1))

  
fileName <- paste0("log2FC_barplot_psGroup_A485_vs_DMSO_up")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

Grouping with P-E number

Grouping
# temp2 contains genes from group 1, 2, 5, 8 and loop counts
#calculating diff score and log2fc distribution based on p-n numbers

peOver4 <- (temp2 %>% dplyr::filter(num_pe >= 4))$gene
peOver3 <- (temp2 %>% dplyr::filter(num_pe >= 3, num_pe < 4))$gene
peOver2 <- (temp2 %>% dplyr::filter(num_pe >= 2, num_pe < 3))$gene
peOver1 <- (temp2 %>% dplyr::filter(num_pe >= 1, num_pe < 2))$gene
peOver0 <- (temp2 %>% dplyr::filter(num_pe < 1))$gene



## Dividing genes into groups
temp <- geneAnnoData %>% dplyr::select(diff_A485_DMSO, distance, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = mean(diff_A485_DMSO),
            mean_distance = mean(distance),
            .groups = 'drop')

diff.RNA <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)

maxLog2FC = 2

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  drop_na(shrinked_log2FC)


temp <- temp %>% dplyr::mutate(
  peOver = ifelse(gene %in% peOver4, "peOver4",
                  ifelse(gene %in% peOver3, "peOver3",
                         ifelse(gene %in% peOver2, "peOver2",
                                ifelse(gene %in% peOver1, "peOver1",
                                       ifelse(gene %in% peOver0, "peOver0", NA)))))) %>%
  drop_na(peOver)
(SKIP) GO for each group
# GOfigDir <- here(figDir, "../GO")
# getGO("peOver4", GOfigDir, psOver4)
# getGO("peOver3", GOfigDir, psOver3)
# getGO("peOver2", GOfigDir, psOver2)
# getGO("peOver1", GOfigDir, psOver1)
# getGO("peOver0", GOfigDir, psOver0)
# 
# #####################
# GO0.df <- as.data.frame(enrichGO(gene = psOver0, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO1.df <- as.data.frame(enrichGO(gene = psOver1, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO2.df <- as.data.frame(enrichGO(gene = psOver2, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO3.df <- as.data.frame(enrichGO(gene = psOver3, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO4.df <- as.data.frame(enrichGO(gene = psOver4, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# 
# 
# subset0 <- GO0.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver0") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# subset1 <- GO1.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver1") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# subset2 <- GO2.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver2") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# subset3 <- GO3.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver3") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# subset4 <- GO4.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver4") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# 
# 
# GOlist <- factor(c("GO:0006397", "GO:0008380", "GO:0022613", "GO:0034470",
#                    "GO:0016055", "GO:0007389", "GO:0048562", "GO:0045165", 
#                    "GO:0072001", "GO:0007517", "GO:0048705", 
#                    "GO:0040029", "GO:0010165"))
# 
# data <- bind_rows(bind_rows(bind_rows(subset0, subset1), subset2), subset4) %>%
#   dplyr::filter(ID %in% GOlist)
# 
# p <- ggplot(data, aes(x = group, y = Description, color = p.adjust, size = gr)) + 
#   geom_point() + theme_bw() +
#   scale_color_gradient(low = "red", high = "blue", limits = c(0, 0.05)) +
#   scale_size_continuous(range = c(0, 3)) +
#   labs(x = NULL, y = NULL) +
#   theme(axis.text = element_text(size = 6),  # Set axis text size
#         axis.title = element_text(size = 6), # Set axis title size (if not removed)
#         legend.text = element_text(size = 6), # Set legend text size
#         legend.title = element_text(size = 6)) 
# 
# fileName <- here(figDir, "..", "GO", "GO_groups_pe")
# height = 2
# width = 3.4
# svglite(paste0(fileName, ".svg"), height = height, width = width)
# print(p)
# dev.off()
loop score
# getPvalWilcox <- function(data, group1, group2){
#   distance1 <- (data %>% dplyr::filter(psOver ==group1) )$mean_diff_score
#   distance2 <- (data %>% dplyr::filter(psOver ==group2) )$mean_diff_score
#   wil <- wilcox.test(distance1, distance2)
#   return(wil$p.value)
# }
# 
# ps01 <- round(getPvalWilcox(temp, "psOver0", "psOver1"), 5)
# ps12 <- round(getPvalWilcox(temp, "psOver1", "psOver2"), 5)
# ps23 <- round(getPvalWilcox(temp, "psOver2", "psOver3"), 5)
# ps34 <- round(getPvalWilcox(temp, "psOver3", "psOver4"), 5)
# ps24 <- round(getPvalWilcox(temp, "psOver2", "psOver4"), 5)
# ps14 <- round(getPvalWilcox(temp, "psOver1", "psOver4"), 5)
# ps04 <- round(getPvalWilcox(temp, "psOver0", "psOver4"), 5)


p <- ggplot(temp, aes(x = peOver, y = mean_diff_score)) + geom_violin(aes(fill = peOver), show.legend = FALSE) +
  geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() + 
  stat_summary(aes(group = peOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  geom_hline(yintercept = 0)
  # annotate("text", x = 1, y = 0, label = paste0("ps01: ", convPvalue(ps01), "\n",
  #                                               "ps12: ", convPvalue(ps12), "\n",
  #                                               "ps23: ", convPvalue(ps23), "\n",
  #                                               "ps34: ", convPvalue(ps34), "\n",
  #                                               "ps24: ", convPvalue(ps24), "\n",
  #                                               "ps14: ",convPvalue( ps14), "\n",
  #                                               "ps04: ",convPvalue( ps04), "\n"), 
  #          color = "black", hjust = 0, size = 3)

  
fileName <- paste0("diffScore_barplot_peGroup_A485_vs_DMSO")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
log2FC
#
#  getPvalWilcox <- function(data, group1, group2){
#   distance1 <- (data %>% dplyr::filter(psOver ==group1) )$log2FoldChange
#   distance2 <- (data %>% dplyr::filter(psOver ==group2) )$log2FoldChange
#   wil <- wilcox.test(distance1, distance2)
#   return(wil$p.value)
# }
# 
tempDown <- temp %>% dplyr::filter(log2FoldChange < 0)
#  
# ps01 <- round(getPvalWilcox(tempDown, "psOver0", "psOver1"), 5)
# ps12 <- round(getPvalWilcox(tempDown, "psOver1", "psOver2"), 5)
# ps23 <- round(getPvalWilcox(tempDown, "psOver2", "psOver3"), 5)
# ps34 <- round(getPvalWilcox(tempDown, "psOver3", "psOver4"), 5)
# ps24 <- round(getPvalWilcox(tempDown, "psOver2", "psOver4"), 5)
# ps14 <- round(getPvalWilcox(tempDown, "psOver1", "psOver4"), 5)
# ps04 <- round(getPvalWilcox(tempDown, "psOver0", "psOver4"), 5)


p <- ggplot(tempDown, aes(x = peOver, y = log2FoldChange)) + geom_violin(aes(fill = peOver), show.legend = FALSE) +
  geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() + 
  stat_summary(aes(group = peOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  geom_hline(yintercept = 0)
  # annotate("text", x = 1, y = -0.5, label = paste0("ps01: ", convPvalue(ps01), "\n",
  #                                               "ps12: ", convPvalue(ps12), "\n",
  #                                               "ps23: ", convPvalue(ps23), "\n",
  #                                               "ps34: ", convPvalue(ps34), "\n",
  #                                               "ps24: ", convPvalue(ps24), "\n",
  #                                               "ps14: ", convPvalue(ps14), "\n",
  #                                               "ps04: ", convPvalue(ps04), "\n"), 
  #          color = "black", hjust = 0, size = 3) + coord_cartesian(ylim = c(-1, 0))

  
fileName <- paste0("log2FC_barplot_peGroup_A485_vs_DMSO_down")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()



tempUp <- temp %>% dplyr::filter(log2FoldChange > 0)
 
# ps01 <- round(getPvalWilcox(tempUp, "psOver0", "psOver1"), 5)
# ps12 <- round(getPvalWilcox(tempUp, "psOver1", "psOver2"), 5)
# ps23 <- round(getPvalWilcox(tempUp, "psOver2", "psOver3"), 5)
# ps34 <- round(getPvalWilcox(tempUp, "psOver3", "psOver4"), 5)
# ps24 <- round(getPvalWilcox(tempUp, "psOver2", "psOver4"), 5)
# ps14 <- round(getPvalWilcox(tempUp, "psOver1", "psOver4"), 5)
# ps04 <- round(getPvalWilcox(tempUp, "psOver0", "psOver4"), 5)
# 




p <- ggplot(tempUp, aes(x = peOver, y = log2FoldChange)) + geom_violin(aes(fill = peOver), show.legend = FALSE) +
  geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() + 
  stat_summary(aes(group = peOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  geom_hline(yintercept = 0)
  # annotate("text", x = 1, y = 0.5, label = paste0("ps01: ", convPvalue(ps01), "\n",
  #                                               "ps12: ", convPvalue(ps12), "\n",
  #                                               "ps23: ", convPvalue(ps23), "\n",
  #                                               "ps34: ", convPvalue(ps34), "\n",
  #                                               "ps24: ", convPvalue(ps24), "\n",
  #                                               "ps14: ", convPvalue(ps14), "\n",
  #                                               "ps04: ", convPvalue(ps04), "\n"), 
  #          color = "black", hjust = 0, size = 3) + coord_cartesian(ylim = c(0, 1))

  
fileName <- paste0("log2FC_barplot_peGroup_A485_vs_DMSO_up")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

Grouping with P-P number

Grouping
# temp2 contains genes from group 1, 2, 5, 8 and loop counts
#calculating diff score and log2fc distribution based on p-n numbers
ppOver4 <- (temp2 %>% dplyr::filter(num_pp >= 4))$gene
ppOver3 <- (temp2 %>% dplyr::filter(num_pp >= 3, num_pp < 4))$gene
ppOver2 <- (temp2 %>% dplyr::filter(num_pp >= 2, num_pp < 3))$gene
ppOver1 <- (temp2 %>% dplyr::filter(num_pp >= 1, num_pp < 2))$gene
ppOver0 <- (temp2 %>% dplyr::filter(num_pp < 1))$gene



## Dividing genes into groups
temp <- geneAnnoData %>% dplyr::select(diff_A485_DMSO, distance, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = mean(diff_A485_DMSO),
            mean_distance = mean(distance),
            .groups = 'drop')

diff.RNA <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)

maxLog2FC = 2

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  drop_na(shrinked_log2FC)


temp <- temp %>% dplyr::mutate(
  ppOver = ifelse(gene %in% ppOver4, "ppOver4",
                  ifelse(gene %in% ppOver3, "ppOver3",
                         ifelse(gene %in% ppOver2, "ppOver2",
                                ifelse(gene %in% ppOver1, "ppOver1",
                                       ifelse(gene %in% ppOver0, "ppOver0", NA)))))) %>%
  drop_na(ppOver)
(SKIP) GO for each group
# GOfigDir <- here(figDir, "../GO")
# getGO("ppOver4", GOfigDir, psOver4)
# getGO("ppOver3", GOfigDir, psOver3)
# getGO("ppOver2", GOfigDir, psOver2)
# getGO("ppOver1", GOfigDir, psOver1)
# getGO("ppOver0", GOfigDir, psOver0)
# 
# #####################
# GO0.df <- as.data.frame(enrichGO(gene = psOver0, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO1.df <- as.data.frame(enrichGO(gene = psOver1, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO2.df <- as.data.frame(enrichGO(gene = psOver2, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO3.df <- as.data.frame(enrichGO(gene = psOver3, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO4.df <- as.data.frame(enrichGO(gene = psOver4, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# 
# 
# subset0 <- GO0.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver0") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# subset1 <- GO1.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver1") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# subset2 <- GO2.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver2") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# subset3 <- GO3.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver3") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# subset4 <- GO4.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver4") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# 
# 
# GOlist <- factor(c("GO:0006397", "GO:0008380", "GO:0022613", "GO:0034470",
#                    "GO:0016055", "GO:0007389", "GO:0048562", "GO:0045165", 
#                    "GO:0072001", "GO:0007517", "GO:0048705", "GO:0003002", "GO:0009411"))
# 
# data <- bind_rows(bind_rows(subset0, subset1), subset3) %>%
#   dplyr::filter(ID %in% GOlist)
# 
# p <- ggplot(data, aes(x = group, y = Description, color = p.adjust, size = gr)) + 
#   geom_point() + theme_bw() +
#   scale_color_gradient(low = "red", high = "blue", limits = c(0, 0.05)) +
#   scale_size_continuous(range = c(0, 3)) +
#   labs(x = NULL, y = NULL) +
#   theme(axis.text = element_text(size = 6),  # Set axis text size
#         axis.title = element_text(size = 6), # Set axis title size (if not removed)
#         legend.text = element_text(size = 6), # Set legend text size
#         legend.title = element_text(size = 6)) 
# 
# fileName <- here(figDir, "..", "GO", "GO_groups_pp")
# height = 2
# width = 3.4
# svglite(paste0(fileName, ".svg"), height = height, width = width)
# print(p)
# dev.off()
loop score
# getPvalWilcox <- function(data, group1, group2){
#   distance1 <- (data %>% dplyr::filter(psOver ==group1) )$mean_diff_score
#   distance2 <- (data %>% dplyr::filter(psOver ==group2) )$mean_diff_score
#   wil <- wilcox.test(distance1, distance2)
#   return(wil$p.value)
# }
# 
# ps01 <- round(getPvalWilcox(temp, "psOver0", "psOver1"), 5)
# ps12 <- round(getPvalWilcox(temp, "psOver1", "psOver2"), 5)
# ps23 <- round(getPvalWilcox(temp, "psOver2", "psOver3"), 5)
# ps34 <- round(getPvalWilcox(temp, "psOver3", "psOver4"), 5)
# ps24 <- round(getPvalWilcox(temp, "psOver2", "psOver4"), 5)
# ps14 <- round(getPvalWilcox(temp, "psOver1", "psOver4"), 5)
# ps04 <- round(getPvalWilcox(temp, "psOver0", "psOver4"), 5)


p <- ggplot(temp, aes(x = ppOver, y = mean_diff_score)) + geom_violin(aes(fill = ppOver), show.legend = FALSE) +
  geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() + 
  stat_summary(aes(group = ppOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  geom_hline(yintercept = 0)
  # annotate("text", x = 1, y = 0, label = paste0("ps01: ", convPvalue(ps01), "\n",
  #                                               "ps12: ", convPvalue(ps12), "\n",
  #                                               "ps23: ", convPvalue(ps23), "\n",
  #                                               "ps34: ", convPvalue(ps34), "\n",
  #                                               "ps24: ", convPvalue(ps24), "\n",
  #                                               "ps14: ", convPvalue(ps14), "\n",
  #                                               "ps04: ", convPvalue(ps04), "\n"), 
  #          color = "black", hjust = 0, size = 3)

  
fileName <- paste0("diffScore_barplot_ppGroup_A485_vs_DMSO")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
log2FC
#
#  getPvalWilcox <- function(data, group1, group2){
#   distance1 <- (data %>% dplyr::filter(psOver ==group1) )$log2FoldChange
#   distance2 <- (data %>% dplyr::filter(psOver ==group2) )$log2FoldChange
#   wil <- wilcox.test(distance1, distance2)
#   return(wil$p.value)
# }

tempDown <- temp %>% dplyr::filter(log2FoldChange < 0)
#  
# ps01 <- round(getPvalWilcox(tempDown, "psOver0", "psOver1"), 5)
# ps12 <- round(getPvalWilcox(tempDown, "psOver1", "psOver2"), 5)
# ps23 <- round(getPvalWilcox(tempDown, "psOver2", "psOver3"), 5)
# ps34 <- round(getPvalWilcox(tempDown, "psOver3", "psOver4"), 5)
# ps24 <- round(getPvalWilcox(tempDown, "psOver2", "psOver4"), 5)
# ps14 <- round(getPvalWilcox(tempDown, "psOver1", "psOver4"), 5)
# ps04 <- round(getPvalWilcox(tempDown, "psOver0", "psOver4"), 5)



p <- ggplot(tempDown, aes(x = ppOver, y = log2FoldChange)) + geom_violin(aes(fill = ppOver), show.legend = FALSE) +
  geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() + 
  stat_summary(aes(group = ppOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  geom_hline(yintercept = 0)
  # annotate("text", x = 1, y = -0.5, label = paste0("ps01: ", convPvalue(ps01), "\n",
  #                                               "ps12: ", convPvalue(ps12), "\n",
  #                                               "ps23: ", convPvalue(ps23), "\n",
  #                                               "ps34: ", convPvalue(ps34), "\n",
  #                                               "ps24: ", convPvalue(ps24), "\n",
  #                                               "ps14: ", convPvalue(ps14), "\n",
  #                                               "ps04: ", convPvalue(ps04), "\n"), 
  #          color = "black", hjust = 0, size = 3) + coord_cartesian(ylim = c(-1, 0))

  
fileName <- paste0("log2FC_barplot_ppGroup_A485_vs_DMSO_down")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()



tempUp <- temp %>% dplyr::filter(log2FoldChange > 0)
#  
# ps01 <- round(getPvalWilcox(tempUp, "psOver0", "psOver1"), 5)
# ps12 <- round(getPvalWilcox(tempUp, "psOver1", "psOver2"), 5)
# ps23 <- round(getPvalWilcox(tempUp, "psOver2", "psOver3"), 5)
# ps34 <- round(getPvalWilcox(tempUp, "psOver3", "psOver4"), 5)
# ps24 <- round(getPvalWilcox(tempUp, "psOver2", "psOver4"), 5)
# ps14 <- round(getPvalWilcox(tempUp, "psOver1", "psOver4"), 5)
# ps04 <- round(getPvalWilcox(tempUp, "psOver0", "psOver4"), 5)
# 
# 



p <- ggplot(tempUp, aes(x = ppOver, y = log2FoldChange)) + geom_violin(aes(fill = ppOver), show.legend = FALSE) +
  geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() + 
  stat_summary(aes(group = ppOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  geom_hline(yintercept = 0)
  # annotate("text", x = 1, y = 0.5, label = paste0("ps01: ", convPvalue(ps01), "\n",
  #                                               "ps12: ", convPvalue(ps12), "\n",
  #                                               "ps23: ", convPvalue(ps23), "\n",
  #                                               "ps34: ", convPvalue(ps34), "\n",
  #                                               "ps24: ", convPvalue(ps24), "\n",
  #                                               "ps14: ", convPvalue(ps14), "\n",
  #                                               "ps04: ", convPvalue(ps04), "\n"), 
  #          color = "black", hjust = 0, size = 3) + coord_cartesian(ylim = c(0, 1))

  
fileName <- paste0("log2FC_barplot_ppGroup_A485_vs_DMSO_up")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

[2.13] How to define multiconnected hub using regulatory loops?

# IMPORTING GENE ANNO DATA FOR P-N LOOPS
name <- "chromo_cons_annoHierarchy"

geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                 diffCutoff = 0.2,
                                 annoList = c("P-P", "P-E", "P-S", "P-X")) %>% dplyr::select(c(1, 2, 3, 4, 5, 6, 11, 12, 13, 22, 23, 24, 25, 27, 28))

data <- fread(here(consensusDir, paste0(name, ".tsv"))) %>% dplyr::select(c(1, 2, 3, 4, 5, 6, 11, 12, 13, 22, 23, 24, 25, 27)) %>%
  dplyr::mutate(binSize = end1 - start1,
                adj_start1 = ifelse(binSize == 5000, start1,
                                    ifelse(binSize == 10000, start1,
                                           start1 + 10000)),
                adj_end1 = adj_start1 + 5000,
                adj_start2 = ifelse(binSize == 5000, start2,
                                    ifelse(binSize == 10000, start2,
                                           start2 + 10000)),
                adj_end2 = adj_start2 + 5000,
                anchor1 = paste(chrom1, adj_start1, adj_end1, sep = "_"),
                anchor2 = paste(chrom2, adj_start2, adj_end2, sep = "_"))

# data <- fread(here(consensusDir, paste0(name, ".tsv"))) %>% dplyr::select(c(1, 2, 3, 4, 5, 6, 11, 12, 13, 22, 23, 24, 25, 27)) %>% 
#   dplyr::mutate(binSize = end1 - start1,
#                 adj_start1 = ifelse(binSize == 5000, start1,
#                                     ifelse(binSize == 10000, start1+5000,
#                                            start1 + 10000)),
#                 adj_end1 = adj_start1 + 5000,
#                 adj_start2 = ifelse(binSize == 5000, start2,
#                                     ifelse(binSize == 10000, start2 + 5000,
#                                            start2 + 10000)),
#                 adj_end2 = adj_start2 + 5000,
#                 anchor1 = paste(chrom1, adj_start1, adj_end1, sep = "_"),
#                 anchor2 = paste(chrom2, adj_start2, adj_end2, sep = "_"))

data.reg <- data %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "P-S", "P-X", "E-E", "E-S", "E-X"))

## Graph
#install.packages("igraph")
library(igraph)

# To be able to construct a graph, each anchor need to have same resolution.
# Shink the size to 10 kb
# If the anchor is 25kb, choose the middle 5kb bin
# If the anchor is 10kb, choose the left 5kb bin

graph <- graph_from_data_frame(data.reg %>% dplyr::select(anchor1, anchor2))
components <- components(graph)
data.reg$hub <- components$membership[data.reg$anchor1]

temp.bedpe <- data.reg %>% dplyr::select(chrom1, adj_start1, adj_end1, chrom2, adj_start2, adj_end2)
fwrite(temp.bedpe, here(consensusDir, paste0(name, "_regulatory_5kbAdj.bedpe")), 
       col.names = FALSE,row.names = FALSE, sep = "\t")  

hubNum <- data.reg %>% group_by(hub) %>% summarise(count = n())

# Hub size distribution
p <- ggplot(hubNum, aes(x = count)) + geom_histogram(binwidth  = 1) + theme_classic() + 
  scale_y_log10()

svglite(here(figDir, "hub_size_distribution.svg"), height = 2, width = 2)
print(p)
dev.off()

data.reg <- data.reg %>% dplyr::left_join(hubNum, by = "hub") 
data.reg <- data.reg %>% dplyr::mutate(peakID = paste(chrom1, start1, start2, sep = "_"))

# Exporting regulatory loops with specific hub size
temp.bedpe <- data.reg %>% dplyr::filter(count >= 3) %>% dplyr::select(chrom1, adj_start1, adj_end1, chrom2, adj_start2, adj_end2)
fwrite(temp.bedpe, here(consensusDir, paste0(name, "_regulatory_5kbAdj_hub3.bedpe")), 
       col.names = FALSE,row.names = FALSE, sep = "\t")  

temp.bedpe <- data.reg %>% dplyr::filter(count >= 5) %>% dplyr::select(chrom1, adj_start1, adj_end1, chrom2, adj_start2, adj_end2)
fwrite(temp.bedpe, here(consensusDir, paste0(name, "_regulatory_5kbAdj_hub5.bedpe")), 
       col.names = FALSE,row.names = FALSE, sep = "\t")

temp.bedpe <- data.reg %>% dplyr::filter(count >= 10) %>% dplyr::select(chrom1, adj_start1, adj_end1, chrom2, adj_start2, adj_end2)
fwrite(temp.bedpe, here(consensusDir, paste0(name, "_regulatory_5kbAdj_hub10.bedpe")), 
       col.names = FALSE,row.names = FALSE, sep = "\t")


# Adding gene annotation to hubs
genePeakPair <- geneAnnoData %>% dplyr::mutate(peakID = paste(chrom1, start1, start2, sep = "_")) %>% dplyr::select(peakID, gene)
data.reg <- data.reg %>% dplyr::left_join(genePeakPair, by = "peakID")

# During unnesting, E-N loops without gene annotated are removed
data.reg <- data.reg %>% unnest(gene) %>% dplyr::mutate(
  group = ifelse(gene %in% gene.group1, "group1",
                 ifelse(gene %in% gene.group2, "group2",
                        ifelse(gene %in% gene.group3, "group3",
                               ifelse(gene %in% gene.group4, "group4",
                                      ifelse(gene %in% gene.group5, "group5",
                                             ifelse(gene %in% gene.group6, "group6",
                                                    ifelse(gene %in% gene.group7, "group7",
                                                           ifelse(gene %in% gene.group8, "group8",
                                                                  ifelse(gene %in% gene.group9, "group9", NA)))))))))
)


temp <- data.reg %>% dplyr::select(gene, hub, count, group) %>%
  group_by(gene) %>% slice_max(count, with_ties = FALSE) %>%
  dplyr::filter(group %in% c("group1", "group2", "group5", "group8"))


# TEMP START
# TEMP START
geneList.group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group1.tsv"))
geneList.group1.temp <- geneList.group1 %>% dplyr::left_join(temp, by = c("gene"))
# TEMP END

getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$count
  distance2 <- (data %>% dplyr::filter(group ==group2) )$count
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}


pv12 <- round(getPvalWilcox(tempSum,"group1", "group2"), 5)
pv15 <- round(getPvalWilcox(tempSum,"group1", "group5"), 5)
pv18 <- round(getPvalWilcox(tempSum,"group1", "group8"), 5)
pv25 <- round(getPvalWilcox(tempSum,"group2", "group5"), 5)
pv28 <- round(getPvalWilcox(tempSum,"group2", "group8"), 5)
pv58 <- round(getPvalWilcox(tempSum,"group5", "group8"), 5)


p <- ggplot(temp, aes(x = group, y = count, fill = group)) + 
  geom_boxplot(width = 0.5, outlier.shape = NA) + theme_classic() + 
    scale_y_continuous(breaks = seq(0, 30, by = 5), limits = c(0, 30)) + 

  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")  +
  annotate("text", x = 1, y = 3, label = paste0("pv12: ", pv12, "\n",
                                                "pv15: ", pv15, "\n",
                                                "pv18: ", pv18, "\n",
                                                "pv25: ", pv25, "\n",
                                                "pv28: ", pv28, "\n",
                                                "pv58: ", pv58, "\n"), 
           color = "black", hjust = 0, size = 1) +  theme(legend.position = "none")


fileName <- paste0("countHub_barplot_diffGroup_dTAG_vs_DMSO")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

[2.14] Looking deeper into TAD and insulation

Insulation score of TSS?

Importing insulation score

# Import insulation score calculated with python
resultDir <- here("../../result")
insScore.DMSO <- fread(here(resultDir, "TAD", "insulationScore_25kb_G1DMSO.tsv")) %>% 
  dplyr::select(c("chrom", "start", "end", "log2_insulation_score_125000")) %>%
  dplyr::mutate(binID = paste(chrom, start, end, sep = "_"))
colnames(insScore.DMSO) <- c("chrom", "start", "end", "insulationScore", "binID")
insScore.dTAG <- fread(here(resultDir, "TAD", "insulationScore_25kb_G1dTAG.tsv")) %>% 
  dplyr::select(c("chrom", "start", "end", "log2_insulation_score_125000")) %>%
  dplyr::mutate(binID = paste(chrom, start, end, sep = "_"))
colnames(insScore.dTAG) <- c("chrom", "start", "end", "insulationScore", "binID")

#insScore <- full_join(insScore.DMSO, insScore.dTAG, by = c("binID")) %>%
#  dplyr::select(binID, chrom.x, start.x, end.x, insulationScore.x, insulationScore.y) 

#colnames(insScore) <- c("binID", "chr", "start", "end", "insulation_score_DMSO", "insulation_scoare_dTAG")

#View(insScore)

Linking to gene

gene.TSS.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3)) %>%
  dplyr::select(V6, V5, V1, TSS)
colnames(gene.TSS.tb) <- c("ensembl", "gene", "chr", "TSS")

getInsulationScore <- function(chr, TSS, insScore.tb){
  temp.tb <- insScore.tb %>% dplyr::filter(chrom == chr, start < TSS, end > TSS)
  out <- temp.tb$insulationScore
  if(length(out) < 1){
    return(NA)
  }else{
    return(temp.tb$insulationScore)
  }
}

gene.insScore.all <- gene.TSS.tb %>% rowwise() %>% dplyr::mutate(
  log2_insScore_DMSO = getInsulationScore(chr, TSS, insScore.DMSO),
  insScore_DMSO = 2^log2_insScore_DMSO,
  log2_insScore_dTAG = getInsulationScore(chr, TSS, insScore.dTAG),
  insScore_dTAG = 2^log2_insScore_dTAG,
  diff_insScore = insScore_dTAG - insScore_DMSO)

Checking insulation score of the nearest TAD

# Importing TAD boundaries
tad_boundary <- fread(here("../../result/TAD", "TAD_25kb_125kb_otsu_boundaries_G1DMSO.bed"))
colnames(tad_boundary) <- c("chr", "start", "end")
tad_boundary <- tad_boundary %>% dplyr::mutate(tad_id = paste(chr, start, end, sep = "_"),
                                               center = (start + end)/2)

findClosestTADBoundary <- function(chrom, TSS, tad_boundary){
  temp <- tad_boundary %>% dplyr::filter(chr == chrom) %>%
    dplyr::mutate(distance = abs(center - TSS)) %>%
    slice_min(distance)
  if(nrow(temp) == 1){
      return(temp$center)
  }else{
    return(NA)
  }
}


gene.insScore.all <- gene.insScore.all %>% rowwise() %>%
  dplyr::mutate(closestBoundary = findClosestTADBoundary(chr, TSS, tad_boundary),
                log2_boundaryInsScore_DMSO = getInsulationScore(chr, closestBoundary, insScore.DMSO),
                boundaryInsScore_DMSO = 2^log2_boundaryInsScore_DMSO,
                log2_boundaryInsScore_dTAG = getInsulationScore(chr, closestBoundary, insScore.dTAG),
                boundaryInsScore_dTAG = 2^log2_boundaryInsScore_dTAG,
                diff_boundaryInsScore = boundaryInsScore_dTAG - boundaryInsScore_DMSO)

saveRDS(gene.insScore.all, here(resultDir, "gene.insScore.all.rds"))
gene.insScore.all <- readRDS(here(resultDir, "gene.insScore.all.rds"))
Plotting for binary groups
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene


gene.insScore <- gene.insScore.all %>% rowwise() %>% dplyr::mutate(
  group = ifelse(ensembl %in% group1, "group1",
                 ifelse(ensembl %in% group2, "group2", NA))) %>% 
  dplyr::filter(!is.na(group))


getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$score
  distance2 <- (data %>% dplyr::filter(group ==group2) )$score
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}


plot_insScore <- function(temp.tb, note, ymin = -1.5, ymax = 0){
  p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
  
  p <- ggplot(temp.tb, aes(x = ".", fill = group, y = -score)) + 
    labs(x = NULL, y = "- Insulation score") +  # Remove x-axis title
    scale_fill_manual(values = rev((c("#777777", "#F28E2C")))) +

    introdataviz::geom_split_violin(linewidth = lineMedium * mmToLineUnit, lineend = "square",
                                    alpha = .4) +
    geom_boxplot(width = 0.3, color = "black",
                 linewidth = lineMedium * mmToLineUnit, lineend = "square",
                 outlier.shape = NA,  alpha = 0.6, show.legend = FALSE) + theme_classic() +
    stat_summary(
      aes(group = group), fun = mean,
      geom = "point", shape = 21, size = 0.5,
      fill = "black", color = "black", position = position_dodge(.3)
    ) +
    
    # 
    # geom_violin(aes(fill = group), color = "black", linewidth = lineThick*mmToLineUnit, lineend = "square", show.legend = FALSE) + 
    # geom_boxplot(width = 0.1,  color = "black", linewidth = lineThick*mmToLineUnit, lineend = "square",
    #              outlier.size = 1, outlier.stroke = NA) +
    # stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 1, fill = "red", color = "black") + 
    geom_hline(yintercept = 0, linewidth = lineThick*mmToLineUnit) +
    theme_classic() + coord_cartesian(ylim = c(ymin, ymax)) +
    annotate("text", x = 1, y = ymin + 1, label = paste0("p12: ", convPvalue(p12)),
             color = "black", hjust = 0, size = 3) +
    theme(
      axis.title = element_text(
        size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
    )+   guides(
      fill = guide_legend(
        keywidth = 0.2,  # Adjust the width of the legend keys
        keyheight = 0.2  # Adjust the height of the legend keys
      ))
      
      
  fileName <- paste0("insulation_score_binaryGroup_", note)
  width <- panelSize(1.55)*mmToInch
height <- panelSize(1.2)*mmToInch
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}

# DMSO
temp.tb <- gene.insScore %>% dplyr::select(group, boundaryInsScore_DMSO)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_DMSO_binaryGroup", ymin = -1.1, ymax = -0.5)

# dTAG
temp.tb <- gene.insScore %>% dplyr::select(group, boundaryInsScore_dTAG)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_dTAG_binaryGroup", ymin =  -1.1, ymax = -0.5)

# diff
temp.tb <- gene.insScore %>% dplyr::select(group, diff_boundaryInsScore)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_diff_binaryGroup", ymin = -0.3, ymax = 0.1)
Plotting for groups
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group2.tsv"))$gene
group5 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group5.tsv"))$gene
group8 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group8.tsv"))$gene


gene.insScore <- gene.insScore.all %>% rowwise() %>% dplyr::mutate(
  group = ifelse(ensembl %in% group1, "group1",
                 ifelse(ensembl %in% group2, "group2",
                        ifelse(ensembl %in% group5, "group5",
                               ifelse(ensembl %in% group8, "group8", NA))))) %>% 
  dplyr::filter(!is.na(group))


getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$score
  distance2 <- (data %>% dplyr::filter(group ==group2) )$score
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}


plot_insScore <- function(temp.tb, note, ymin = 0, ymax = 1.5){
  p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
  p15 <- round(getPvalWilcox(temp.tb, "group1", "group5"), 5)
  p18 <- round(getPvalWilcox(temp.tb, "group1", "group8"), 5)
  p25 <- round(getPvalWilcox(temp.tb, "group2", "group5"), 5)
  p28 <- round(getPvalWilcox(temp.tb, "group2", "group8"), 5)
  p58 <- round(getPvalWilcox(temp.tb, "group5", "group8"), 5)
  
  p <- ggplot(temp.tb, aes(x = group, y = score)) + geom_violin(aes(fill = group), show.legend = FALSE) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() + coord_cartesian(ylim = c(ymin, ymax)) +
    annotate("text", x = 1, y = ymin + 1, label = paste0("p12: ", convPvalue(p12), "\n",
                                                         "p15: ", convPvalue(p15), "\n",
                                                         "p18: ", convPvalue(p18), "\n",
                                                         "p25: ", convPvalue(p25), "\n",
                                                         "p28: ", convPvalue(p28), "\n",
                                                         "p58: ",convPvalue( p58), "\n"),
             color = "black", hjust = 0, size = 3)
  
  
  fileName <- paste0("insulation_score_", note)
  height <- 3
  width <- 3
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}

# DMSO
temp.tb <- gene.insScore %>% dplyr::select(group, boundaryInsScore_DMSO)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_DMSO")

# dTAG
temp.tb <- gene.insScore %>% dplyr::select(group, boundaryInsScore_dTAG)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_dTAG")

# diff
temp.tb <- gene.insScore %>% dplyr::select(group, diff_boundaryInsScore)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_diff", ymin = -0.5, ymax = 1)
Plotting for P-N
temp2 <- readRDS(here(resultDir, "gene_loop_link.rds"))

pnOver8 <- (temp2 %>% dplyr::filter(total >= 8))$gene
pnOver6 <- (temp2 %>% dplyr::filter(total >= 6, total < 8))$gene
pnOver4 <- (temp2 %>% dplyr::filter(total >= 4, total < 6))$gene
pnOver2 <- (temp2 %>% dplyr::filter(total >= 2, total < 4))$gene
pnOver0 <- (temp2 %>% dplyr::filter(total < 2))$gene


gene.insScore <- gene.insScore.all %>% rowwise() %>% dplyr::mutate(
  group = ifelse(ensembl %in% pnOver8, "p8",
                 ifelse(ensembl %in% pnOver6, "p6",
                        ifelse(ensembl %in% pnOver4, "p4",
                               ifelse(ensembl %in% pnOver2, "p2", 
                                      ifelse(ensembl %in% pnOver0, "p0", NA))))))  %>% 
  dplyr::filter(!is.na(group))

getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$score
  distance2 <- (data %>% dplyr::filter(group ==group2) )$score
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

plot_insScore <- function(temp.tb, note, ymin = 0, ymax = 1.5){
  p02 <- round(getPvalWilcox(temp.tb, "p0", "p2"), 5)
  p24 <- round(getPvalWilcox(temp.tb, "p2", "p4"), 5)
  p46 <- round(getPvalWilcox(temp.tb, "p4", "p6"), 5)
  p68 <- round(getPvalWilcox(temp.tb, "p6", "p8"), 5)
  p48 <- round(getPvalWilcox(temp.tb, "p4", "p8"), 5)
  p28 <- round(getPvalWilcox(temp.tb, "p2", "p8"), 5)
  p08 <- round(getPvalWilcox(temp.tb, "p0", "p8"), 5)
  
  p <- ggplot(temp.tb, aes(x = group, y = score)) + geom_violin(aes(fill = group), show.legend = FALSE) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() + coord_cartesian(ylim = c(ymin, ymax)) +
    annotate("text", x = 1, y = ymin + 1, label = paste0("p02: ", convPvalue(p02), "\n",
                                                         "p24: ", convPvalue(p24), "\n",
                                                         "p46: ", convPvalue(p46), "\n",
                                                         "p68: ", convPvalue(p68), "\n",
                                                         "p48: ", convPvalue(p48), "\n",
                                                         "p28: ",convPvalue( p28), "\n",
                                                         "p08: ",convPvalue( p08), "\n"),
             color = "black", hjust = 0, size = 3)  +
    scale_fill_manual(values = c("#d9d9d9", "#bdbdbd", "#969696", "#737373", "#525252"))
  
  
  fileName <- paste0("insulation_score_", note)
  height <- 3
  width <- 3
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}

# DMSO
temp.tb <- gene.insScore %>% dplyr::select(group, boundaryInsScore_DMSO)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_DMSO_P-N")

# dTAG
temp.tb <- gene.insScore %>% dplyr::select(group, boundaryInsScore_dTAG)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_dTAG_P-N")

# diff
temp.tb <- gene.insScore %>% dplyr::select(group, diff_boundaryInsScore)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_diff_P-N", ymin = -0.5, ymax = 1)
Plotting for P-S
temp2 <- readRDS(here(resultDir, "gene_loop_link.rds"))

psOver4 <- (temp2 %>% dplyr::filter(num_ps >= 4))$gene
psOver3 <- (temp2 %>% dplyr::filter(num_ps >= 3, num_ps < 4))$gene
psOver2 <- (temp2 %>% dplyr::filter(num_ps >= 2, num_ps < 3))$gene
psOver1 <- (temp2 %>% dplyr::filter(num_ps >= 1, num_ps < 2))$gene
psOver0 <- (temp2 %>% dplyr::filter(num_ps < 1))$gene


gene.insScore <- gene.insScore.all %>% rowwise() %>% dplyr::mutate(
  group = ifelse(ensembl %in% psOver4, "p4",
                 ifelse(ensembl %in% psOver3, "p3",
                        ifelse(ensembl %in% psOver2, "p2",
                               ifelse(ensembl %in% psOver1, "p1", 
                                      ifelse(ensembl %in% psOver0, "p0", NA))))))  %>% 
  dplyr::filter(!is.na(group))

getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$score
  distance2 <- (data %>% dplyr::filter(group ==group2) )$score
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

plot_insScore <- function(temp.tb, note, ymin = 0, ymax = 1.5){
  p01 <- round(getPvalWilcox(temp.tb, "p0", "p1"), 5)
  p12 <- round(getPvalWilcox(temp.tb, "p1", "p2"), 5)
  p23 <- round(getPvalWilcox(temp.tb, "p2", "p3"), 5)
  p34 <- round(getPvalWilcox(temp.tb, "p3", "p4"), 5)
  p24 <- round(getPvalWilcox(temp.tb, "p2", "p4"), 5)
  p14 <- round(getPvalWilcox(temp.tb, "p1", "p4"), 5)
  p04 <- round(getPvalWilcox(temp.tb, "p0", "p4"), 5)
  
  p <- ggplot(temp.tb, aes(x = group, y = score)) + geom_violin(aes(fill = group), show.legend = FALSE) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() + coord_cartesian(ylim = c(ymin, ymax)) +
    annotate("text", x = 1, y = ymin + 1, label = paste0("p01: ", convPvalue(p01), "\n",
                                                         "p12: ", convPvalue(p12), "\n",
                                                         "p23: ", convPvalue(p23), "\n",
                                                         "p34: ", convPvalue(p34), "\n",
                                                         "p24: ", convPvalue(p24), "\n",
                                                         "p14: ",convPvalue( p14), "\n",
                                                         "p04: ",convPvalue( p04), "\n"), 
             color = "black", hjust = 0, size = 3) +
    scale_fill_manual(values = c("#d9d9d9", "#bdbdbd", "#969696", "#737373", "#525252"))
  
  
  fileName <- paste0("insulation_score_", note)
  height <- 3
  width <- 3
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}

# DMSO
temp.tb <- gene.insScore %>% dplyr::select(group, boundaryInsScore_DMSO)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_DMSO_P-S")

# dTAG
temp.tb <- gene.insScore %>% dplyr::select(group, boundaryInsScore_dTAG)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_dTAG_P-S")

# diff
temp.tb <- gene.insScore %>% dplyr::select(group, diff_boundaryInsScore)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_diff_P-S", ymin = -0.5, ymax = 1)
Plotting for P-E
temp2 <- readRDS(here(resultDir, "gene_loop_link.rds"))

peOver4 <- (temp2 %>% dplyr::filter(num_pe >= 4))$gene
peOver3 <- (temp2 %>% dplyr::filter(num_pe >= 3, num_pe < 4))$gene
peOver2 <- (temp2 %>% dplyr::filter(num_pe >= 2, num_pe < 3))$gene
peOver1 <- (temp2 %>% dplyr::filter(num_pe >= 1, num_pe < 2))$gene
peOver0 <- (temp2 %>% dplyr::filter(num_pe < 1))$gene


gene.insScore <- gene.insScore.all %>% rowwise() %>% dplyr::mutate(
  group = ifelse(ensembl %in% peOver4, "p4",
                 ifelse(ensembl %in% peOver3, "p3",
                        ifelse(ensembl %in% peOver2, "p2",
                               ifelse(ensembl %in% peOver1, "p1", 
                                      ifelse(ensembl %in% peOver0, "p0", NA))))))  %>% 
  dplyr::filter(!is.na(group))

getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$score
  distance2 <- (data %>% dplyr::filter(group ==group2) )$score
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

plot_insScore <- function(temp.tb, note, ymin = 0, ymax = 1.5){
  p01 <- round(getPvalWilcox(temp.tb, "p0", "p1"), 5)
  p12 <- round(getPvalWilcox(temp.tb, "p1", "p2"), 5)
  p23 <- round(getPvalWilcox(temp.tb, "p2", "p3"), 5)
  p34 <- round(getPvalWilcox(temp.tb, "p3", "p4"), 5)
  p24 <- round(getPvalWilcox(temp.tb, "p2", "p4"), 5)
  p14 <- round(getPvalWilcox(temp.tb, "p1", "p4"), 5)
  p04 <- round(getPvalWilcox(temp.tb, "p0", "p4"), 5)
  
  p <- ggplot(temp.tb, aes(x = group, y = score)) + geom_violin(aes(fill = group), show.legend = FALSE) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() + coord_cartesian(ylim = c(ymin, ymax)) +
    annotate("text", x = 1, y = ymin + 1, label = paste0("p01: ", convPvalue(p01), "\n",
                                                         "p12: ", convPvalue(p12), "\n",
                                                         "p23: ", convPvalue(p23), "\n",
                                                         "p34: ", convPvalue(p34), "\n",
                                                         "p24: ", convPvalue(p24), "\n",
                                                         "p14: ",convPvalue( p14), "\n",
                                                         "p04: ",convPvalue( p04), "\n"), 
             color = "black", hjust = 0, size = 3) +
    scale_fill_manual(values = c("#d9d9d9", "#bdbdbd", "#969696", "#737373", "#525252"))
  
  
  fileName <- paste0("insulation_score_", note)
  height <- 3
  width <- 3
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}

# DMSO
temp.tb <- gene.insScore %>% dplyr::select(group, boundaryInsScore_DMSO)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_DMSO_P-E")

# dTAG
temp.tb <- gene.insScore %>% dplyr::select(group, boundaryInsScore_dTAG)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_dTAG_P-E")

# diff
temp.tb <- gene.insScore %>% dplyr::select(group, diff_boundaryInsScore)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_diff_P-E", ymin = -0.5, ymax = 1)
Plotting for P-P
temp2 <- readRDS(here(resultDir, "gene_loop_link.rds"))

ppOver4 <- (temp2 %>% dplyr::filter(num_pp >= 4))$gene
ppOver3 <- (temp2 %>% dplyr::filter(num_pp >= 3, num_pp < 4))$gene
ppOver2 <- (temp2 %>% dplyr::filter(num_pp >= 2, num_pp < 3))$gene
ppOver1 <- (temp2 %>% dplyr::filter(num_pp >= 1, num_pp < 2))$gene
ppOver0 <- (temp2 %>% dplyr::filter(num_pp < 1))$gene


gene.insScore <- gene.insScore.all %>% rowwise() %>% dplyr::mutate(
  group = ifelse(ensembl %in% ppOver4, "p4",
                 ifelse(ensembl %in% ppOver3, "p3",
                        ifelse(ensembl %in% ppOver2, "p2",
                               ifelse(ensembl %in% ppOver1, "p1", 
                                      ifelse(ensembl %in% ppOver0, "p0", NA))))))  %>% 
  dplyr::filter(!is.na(group))

getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$score
  distance2 <- (data %>% dplyr::filter(group ==group2) )$score
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

plot_insScore <- function(temp.tb, note, ymin = 0, ymax = 1.5){
  p01 <- round(getPvalWilcox(temp.tb, "p0", "p1"), 5)
  p12 <- round(getPvalWilcox(temp.tb, "p1", "p2"), 5)
  p23 <- round(getPvalWilcox(temp.tb, "p2", "p3"), 5)
  p34 <- round(getPvalWilcox(temp.tb, "p3", "p4"), 5)
  p24 <- round(getPvalWilcox(temp.tb, "p2", "p4"), 5)
  p14 <- round(getPvalWilcox(temp.tb, "p1", "p4"), 5)
  p04 <- round(getPvalWilcox(temp.tb, "p0", "p4"), 5)
  
  p <- ggplot(temp.tb, aes(x = group, y = score)) + geom_violin(aes(fill = group), show.legend = FALSE) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() + coord_cartesian(ylim = c(ymin, ymax)) +
    annotate("text", x = 1, y = ymin + 1, label = paste0("p01: ", convPvalue(p01), "\n",
                                                         "p12: ", convPvalue(p12), "\n",
                                                         "p23: ", convPvalue(p23), "\n",
                                                         "p34: ", convPvalue(p34), "\n",
                                                         "p24: ", convPvalue(p24), "\n",
                                                         "p14: ",convPvalue( p14), "\n",
                                                         "p04: ",convPvalue( p04), "\n"), 
             color = "black", hjust = 0, size = 3) +
    scale_fill_manual(values = c("#d9d9d9", "#bdbdbd", "#969696", "#737373", "#525252"))
  
  
  fileName <- paste0("insulation_score_", note)
  height <- 3
  width <- 3
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}

# DMSO
temp.tb <- gene.insScore %>% dplyr::select(group, boundaryInsScore_DMSO)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_DMSO_P-P")

# dTAG
temp.tb <- gene.insScore %>% dplyr::select(group, boundaryInsScore_dTAG)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_dTAG_P-P")

# diff
temp.tb <- gene.insScore %>% dplyr::select(group, diff_boundaryInsScore)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_diff_P-P", ymin = -0.5, ymax = 1)

[2.15] TAD distance/size

[2.16] Checking the intensity of RAD21 at TAD boundary

Calculating RAD21 intensity of the nearest TAD boundary

refDir <- here("../..", "reference")

# Importing TAD boundary
tad_boundary <- fread(here("../../result/TAD", "TAD_25kb_125kb_otsu_boundaries_G1DMSO.bed"))
colnames(tad_boundary) <- c("chr", "start", "end")
tad_boundary <- tad_boundary %>% dplyr::mutate(tad_id = paste(chr, start, end, sep = "_"),
                                               start = start -175000/2,
                                               end = end + 175000/2,
                                               center = (start + end)/2)

# Importing RAD21 bigwig track
bw.RAD21 <- import(here(refDir, "33250_RAD21_ab992_Bruce-4_trim_q20_dedup_black_depthNorm_bin50bp.bw"))

# Create GRanges object for TAD boundaries
tad_ranges <- makeGRangesFromDataFrame(tad_boundary, keep.extra.columns = TRUE)

# Find overlaps between all TAD boundaries and RAD21 bigwig data
overlaps <- findOverlaps(tad_ranges, bw.RAD21)

# Extract overlapping regions and scores from the BigWig
overlapping_bw <- bw.RAD21[subjectHits(overlaps)]
overlapping_tads <- tad_ranges[queryHits(overlaps)]

# Aggregate scores by TAD boundary regions


scores <- as_tibble(overlapping_bw) %>%
  dplyr::mutate(tad_id = tad_ranges[queryHits(overlaps)]$tad_id) %>%
  group_by(tad_id) %>%
  summarise(rad21Score = sum(score))

tad_boundary <- tad_boundary %>%
  left_join(scores, by = "tad_id")





# Importing gene
gene.TSS.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3)) %>%
  dplyr::select(V6, V5, V1, TSS)
colnames(gene.TSS.tb) <- c("ensembl", "gene", "chr", "TSS")


# Find nearest TAD boundary
findClosestTADBoundaryID <- function(chrom, TSS, tad_boundary){
  temp <- tad_boundary %>% dplyr::filter(chr == chrom) %>%
    dplyr::mutate(distance = abs(center - TSS)) %>%
    slice_min(distance)
  if(nrow(temp) == 1){
      return(temp$tad_id)
  }else{
    return(NA)
  }
}
gene.TSS.tb <- gene.TSS.tb %>% rowwise() %>%
  dplyr::mutate(closestBoundary = findClosestTADBoundaryID(chr, TSS, tad_boundary))

temp <- tad_boundary %>% dplyr::select(tad_id, rad21Score)

gene.TSS.tb <- gene.TSS.tb %>% left_join(temp, by = c("closestBoundary" = "tad_id") )
Plotting for groups
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group2.tsv"))$gene
group5 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group5.tsv"))$gene
group8 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group8.tsv"))$gene


gene.TSS.tb.plot <- gene.TSS.tb %>% rowwise() %>% dplyr::mutate(
  group = ifelse(ensembl %in% group1, "group1",
                 ifelse(ensembl %in% group2, "group2",
                        ifelse(ensembl %in% group5, "group5",
                               ifelse(ensembl %in% group8, "group8", NA))))) %>% 
  dplyr::filter(!is.na(group))


## Plot distance
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group == group1) )$rad21Score
  distance2 <- (data %>% dplyr::filter(group == group2) )$rad21Score
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}


plot_rad21ScoreAtBoundary <- function(temp.tb, note){
  p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
  p15 <- round(getPvalWilcox(temp.tb, "group1", "group5"), 5)
  p18 <- round(getPvalWilcox(temp.tb, "group1", "group8"), 5)
  p25 <- round(getPvalWilcox(temp.tb, "group2", "group5"), 5)
  p28 <- round(getPvalWilcox(temp.tb, "group2", "group8"), 5)
  p58 <- round(getPvalWilcox(temp.tb, "group5", "group8"), 5)
  
  p <- ggplot(temp.tb, aes(x = group, y = rad21Score)) + geom_violin(aes(fill = group), show.legend = FALSE) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() + 
    annotate("text", x = 1, y = 20 + 1, label = paste0("p12: ", convPvalue(p12), "\n",
                                                         "p15: ", convPvalue(p15), "\n",
                                                         "p18: ", convPvalue(p18), "\n",
                                                         "p25: ", convPvalue(p25), "\n",
                                                         "p28: ", convPvalue(p28), "\n",
                                                         "p58: ",convPvalue( p58), "\n"),
             color = "black", hjust = 0, size = 3)
  
  
  fileName <- paste0("rad21Score_", note)
  height <- 3
  width <- 3
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}

plot_rad21ScoreAtBoundary(gene.TSS.tb.plot, "group")

[2.17] Checking the density of gene / enhancer within designated TAD

Making TAD gene count

### IMPORTING REQUIRED DATA
# Importing TAD
tad <- fread(here("../../result/TAD", "TAD_25kb_125kb_otsu_G1DMSO.bedpe"))
colnames(tad) <- c("chr1", "start1", "end1", "chr2", "start2", "end2")
tad <- tad %>% dplyr::mutate(tadID = paste(chr1, start1, end1, sep = "_"))


# Importing gene
gene.TSS.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3)) %>%
  dplyr::select(V6, V5, V1, TSS)
colnames(gene.TSS.tb) <- c("ensembl", "gene", "chr", "TSS")

# Importing enhancer
peak.H3K27ac <- as_tibble(importPeak(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.narrowPeak.bed"))) %>%
  dplyr::mutate(center = (start + end)/2)


### FUNCTION FOR COUNTING
findItsTAD <- function(chrom, coordinate, tad){
  temp <- tad %>% dplyr::filter(chr1 == chrom,
                                         start1 < coordinate,
                                         end1 > coordinate)
  if(nrow(temp) == 1){
    return(temp$tadID)
  }else{
    return(NA)
  }
}

### Counting each features for TAD
### Counting gene
geneCountPerTAD <- gene.TSS.tb %>% rowwise() %>%
  dplyr::mutate(TAD = findItsTAD(chr, TSS, tad)) %>%
  drop_na() %>%
  group_by(TAD) %>%
  summarize(count = n())

### Counting enhancer

enhCountPerTAD <- peak.H3K27ac %>% rowwise() %>%
  dplyr::mutate(TAD = findItsTAD(seqnames, center, tad)) %>%
  drop_na() %>%
  group_by(TAD) %>%
  summarize(count = n())

### Get TAD sizez
tad.db <- tad %>% left_join(geneCountPerTAD, by = c("tadID" = "TAD"), ) %>%
  left_join(enhCountPerTAD, by = c("tadID" = "TAD")) %>%
  dplyr::select(c(1, 2, 3, 7, 8, 9)) %>%
  mutate_all(~replace(., is.na(.), 0))

colnames(tad.db) <- c("chr", "start", "end", "tadID", "geneCount", "enhCount")

tad.db <- tad.db %>% dplyr::mutate(
  tadSize = end - start,
  geneDensity = geneCount/tadSize * 100e3,
  enhDensity = enhCount/tadSize* 100e3,
  regDensity = (geneCount + enhCount)/tadSize* 100e3
)



### Assign TAD and information to gene
gene.TSS.tb <- gene.TSS.tb %>% rowwise() %>%
  dplyr::mutate(TAD = findItsTAD(chr, TSS, tad))
#gene.TSS.tb <- gene.TSS.tb %>% dplyr::filter(!is.na(TAD))
temp <- tad.db %>% dplyr::select(-c(1, 2, 3))

gene.TSS.tb <- gene.TSS.tb %>% dplyr::left_join(temp, by = c("TAD" = "tadID"))

Plotting for groups

geneCount
Binary group
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene


gene.TSS.tb.plot <- gene.TSS.tb %>% rowwise() %>% dplyr::mutate(
  group = ifelse(ensembl %in% group1, "group1",
                 ifelse(ensembl %in% group2, "group2", NA))) %>% 
  dplyr::filter(!is.na(group))


## geneCount
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group == group1) )$geneCount
  distance2 <- (data %>% dplyr::filter(group == group2) )$geneCount
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}


plot_geneCount <- function(temp.tb, note, ymin = 0, ymax = 2000000){
  p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
  
  p <- ggplot(temp.tb, aes(x = group, y = geneCount)) + geom_violin(aes(fill = group), show.legend = FALSE) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() +
    annotate("text", x = 1, y = (ymin + ymax)/2 + 1, label = paste0("p12: ", convPvalue(p12)),
             color = "black", hjust = 0, size = 3)
  
  
  fileName <- paste0("geneCount_", note)
  height <- 3
  width <- 1.5
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}
plot_geneCount(gene.TSS.tb.plot, "group_binaryGroup", ymax = 200)
Group
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group2.tsv"))$gene
group5 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group5.tsv"))$gene
group8 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group8.tsv"))$gene


gene.TSS.tb.plot <- gene.TSS.tb %>% rowwise() %>% dplyr::mutate(
  group = ifelse(ensembl %in% group1, "group1",
                 ifelse(ensembl %in% group2, "group2",
                        ifelse(ensembl %in% group5, "group5",
                               ifelse(ensembl %in% group8, "group8", NA))))) %>% 
  dplyr::filter(!is.na(group))


## geneCount
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group == group1) )$geneCount
  distance2 <- (data %>% dplyr::filter(group == group2) )$geneCount
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}


plot_geneCount <- function(temp.tb, note, ymin = 0, ymax = 2000000){
  p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
  p15 <- round(getPvalWilcox(temp.tb, "group1", "group5"), 5)
  p18 <- round(getPvalWilcox(temp.tb, "group1", "group8"), 5)
  p25 <- round(getPvalWilcox(temp.tb, "group2", "group5"), 5)
  p28 <- round(getPvalWilcox(temp.tb, "group2", "group8"), 5)
  p58 <- round(getPvalWilcox(temp.tb, "group5", "group8"), 5)
  
  p <- ggplot(temp.tb, aes(x = group, y = geneCount)) + geom_violin(aes(fill = group), show.legend = FALSE) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() +
    annotate("text", x = 1, y = (ymin + ymax)/2 + 1, label = paste0("p12: ", convPvalue(p12), "\n",
                                                         "p15: ", convPvalue(p15), "\n",
                                                         "p18: ", convPvalue(p18), "\n",
                                                         "p25: ", convPvalue(p25), "\n",
                                                         "p28: ", convPvalue(p28), "\n",
                                                         "p58: ",convPvalue( p58), "\n"),
             color = "black", hjust = 0, size = 3)
  
  
  fileName <- paste0("geneCount_", note)
  height <- 3
  width <- 3
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}
plot_geneCount(gene.TSS.tb.plot, "group", ymax = 200)
enhCount
Binary Group
## enhCount
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group == group1) )$enhCount
  distance2 <- (data %>% dplyr::filter(group == group2) )$enhCount
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}


plot_enhCount <- function(temp.tb, note, ymin = 0, ymax = 2000000){
  p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
  
  p <- ggplot(temp.tb, aes(x = group, y = enhCount)) + geom_violin(aes(fill = group), show.legend = FALSE) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() +
    annotate("text", x = 1, y = (ymin + ymax)/2 + 1, label = paste0("p12: ", convPvalue(p12)),
             color = "black", hjust = 0, size = 3)
  
  
  fileName <- paste0("enhCount_", note)
  height <- 3
  width <- 1.5
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}
plot_enhCount(gene.TSS.tb.plot, "group_binaryGroup", ymax = 200)
Group
## enhCount
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group == group1) )$enhCount
  distance2 <- (data %>% dplyr::filter(group == group2) )$enhCount
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}


plot_enhCount <- function(temp.tb, note, ymin = 0, ymax = 2000000){
  p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
  p15 <- round(getPvalWilcox(temp.tb, "group1", "group5"), 5)
  p18 <- round(getPvalWilcox(temp.tb, "group1", "group8"), 5)
  p25 <- round(getPvalWilcox(temp.tb, "group2", "group5"), 5)
  p28 <- round(getPvalWilcox(temp.tb, "group2", "group8"), 5)
  p58 <- round(getPvalWilcox(temp.tb, "group5", "group8"), 5)
  
  p <- ggplot(temp.tb, aes(x = group, y = enhCount)) + geom_violin(aes(fill = group), show.legend = FALSE) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() +
    annotate("text", x = 1, y = (ymin + ymax)/2 + 1, label = paste0("p12: ", convPvalue(p12), "\n",
                                                         "p15: ", convPvalue(p15), "\n",
                                                         "p18: ", convPvalue(p18), "\n",
                                                         "p25: ", convPvalue(p25), "\n",
                                                         "p28: ", convPvalue(p28), "\n",
                                                         "p58: ",convPvalue( p58), "\n"),
             color = "black", hjust = 0, size = 3)
  
  
  fileName <- paste0("enhCount_", note)
  height <- 3
  width <- 3
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}
plot_enhCount(gene.TSS.tb.plot, "group", ymax = 200)
geneDensity
binaryGroup
## geneDensity
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group == group1) )$geneDensity
  distance2 <- (data %>% dplyr::filter(group == group2) )$geneDensity
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}


plot_geneDensity <- function(temp.tb, note, ymin = 0, ymax = 10){
  p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
  
  p <- ggplot(temp.tb, aes(x = ".", fill = group, y = geneDensity)) +
    # Set axis labels (no x-axis title)
    labs(x = NULL, y = "Gene density within TAD") +
    scale_fill_manual(values = (rev(c("#777777","#F28E2C")))) +
    # Violin plot with black outline, customized line width and end
    introdataviz::geom_split_violin(linewidth = lineMedium * mmToLineUnit, lineend = "square",
                                    alpha = .4
    ) +
    
    # Box plot with customized line width, square end, and outlier size
    geom_boxplot(
      width = 0.3, color = "black",
      linewidth = lineMedium * mmToLineUnit, lineend = "square",
      outlier.shape = NA,  alpha = 0.6, show.legend = FALSE
    ) +
    
    # Mean point in each group
    stat_summary(
      aes(group = group), fun = mean,
      geom = "point", shape = 21, size = 0.5,
      fill = "black", color = "black", position = position_dodge(.3)
    ) +
    
    # Horizontal line at y = 0
    geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
    
    # Coordinate limits and custom y-axis labels
    coord_cartesian(ylim = c(ymin, ymax)) +
    
    # Annotate p-value text
    annotate(
      "text", x = 1, y = ymin + 1,
      label = paste0("p12: ", convPvalue(p12)),
      color = "black", hjust = 0, size = 3
    ) +
    
    # Theme customization
    theme_classic() +
    theme(
      axis.title = element_text(
        size = fontSizeM, family = fontType, color = "#000000"
      ),
      axis.text = element_text(
        size = fontSizeS, family = fontType, color = "#000000"
      ),
      axis.text.x = element_text(
        angle = 45, hjust = 1, vjust = 1
      ),
      axis.line = element_line(
        color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
      ),
      axis.ticks = element_line(
        color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
      ),
      panel.background = element_rect(fill = "transparent"),
      legend.text = element_text(family = fontType, size = fontSizeS),
      legend.title = element_text(family = fontType, size = fontSizeS)
    )+   
    guides(
      fill = guide_legend(
        keywidth = 0.2,  # Adjust the width of the legend keys
        keyheight = 0.2  # Adjust the height of the legend keys
      ))
  
  
  
  
  
  fileName <- paste0("geneDensity_", note)
  width <- panelSize(1.5)*mmToInch
  height <- panelSize(1.2)*mmToInch
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}
plot_geneDensity(gene.TSS.tb.plot, "group_binaryGroup", ymax = 8)
Group
## geneDensity
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group == group1) )$geneDensity
  distance2 <- (data %>% dplyr::filter(group == group2) )$geneDensity
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}


plot_geneDensity <- function(temp.tb, note, ymin = 0, ymax = 2000000){
  p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
  p15 <- round(getPvalWilcox(temp.tb, "group1", "group5"), 5)
  p18 <- round(getPvalWilcox(temp.tb, "group1", "group8"), 5)
  p25 <- round(getPvalWilcox(temp.tb, "group2", "group5"), 5)
  p28 <- round(getPvalWilcox(temp.tb, "group2", "group8"), 5)
  p58 <- round(getPvalWilcox(temp.tb, "group5", "group8"), 5)
  
  p <- ggplot(temp.tb, aes(x = group, y = geneDensity)) + geom_violin(aes(fill = group), show.legend = FALSE) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() +
    annotate("text", x = 1, y = (ymin + ymax)/2 + 1, label = paste0("p12: ", convPvalue(p12), "\n",
                                                         "p15: ", convPvalue(p15), "\n",
                                                         "p18: ", convPvalue(p18), "\n",
                                                         "p25: ", convPvalue(p25), "\n",
                                                         "p28: ", convPvalue(p28), "\n",
                                                         "p58: ",convPvalue( p58), "\n"),
             color = "black", hjust = 0, size = 3)
  
  
  fileName <- paste0("geneDensity_", note)
  height <- 3
  width <- 3
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}
plot_geneDensity(gene.TSS.tb.plot, "group", ymax = 10)
enhDensity
Binary group
## enhDensity
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group == group1) )$enhDensity
  distance2 <- (data %>% dplyr::filter(group == group2) )$enhDensity
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}


plot_enhDensity <- function(temp.tb, note, ymin = 0, ymax = 2000000){
  p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
  
  p <- ggplot(temp.tb, aes(x = group, y = enhDensity)) + geom_violin(aes(fill = group), show.legend = FALSE) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() +
    annotate("text", x = 1, y = (ymin + ymax)/2 + 1, label = paste0("p12: ", convPvalue(p12)),
             color = "black", hjust = 0, size = 3)
  
  
  fileName <- paste0("enhDensity_", note)
  height <- 3
  width <- 1.5
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}
plot_enhDensity(gene.TSS.tb.plot, "group_binaryGroup", ymax = 10)
Group
## enhDensity
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group == group1) )$enhDensity
  distance2 <- (data %>% dplyr::filter(group == group2) )$enhDensity
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}


plot_enhDensity <- function(temp.tb, note, ymin = 0, ymax = 2000000){
  p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
  p15 <- round(getPvalWilcox(temp.tb, "group1", "group5"), 5)
  p18 <- round(getPvalWilcox(temp.tb, "group1", "group8"), 5)
  p25 <- round(getPvalWilcox(temp.tb, "group2", "group5"), 5)
  p28 <- round(getPvalWilcox(temp.tb, "group2", "group8"), 5)
  p58 <- round(getPvalWilcox(temp.tb, "group5", "group8"), 5)
  
  p <- ggplot(temp.tb, aes(x = group, y = enhDensity)) + geom_violin(aes(fill = group), show.legend = FALSE) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() +
    annotate("text", x = 1, y = (ymin + ymax)/2 + 1, label = paste0("p12: ", convPvalue(p12), "\n",
                                                         "p15: ", convPvalue(p15), "\n",
                                                         "p18: ", convPvalue(p18), "\n",
                                                         "p25: ", convPvalue(p25), "\n",
                                                         "p28: ", convPvalue(p28), "\n",
                                                         "p58: ",convPvalue( p58), "\n"),
             color = "black", hjust = 0, size = 3)
  
  
  fileName <- paste0("enhDensity_", note)
  height <- 3
  width <- 3
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}
plot_enhDensity(gene.TSS.tb.plot, "group", ymax = 10)
regdensity
## regDensity
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group == group1) )$regDensity
  distance2 <- (data %>% dplyr::filter(group == group2) )$regDensity
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}


plot_regDensity <- function(temp.tb, note, ymin = 0, ymax = 2000000){
  p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
  p15 <- round(getPvalWilcox(temp.tb, "group1", "group5"), 5)
  p18 <- round(getPvalWilcox(temp.tb, "group1", "group8"), 5)
  p25 <- round(getPvalWilcox(temp.tb, "group2", "group5"), 5)
  p28 <- round(getPvalWilcox(temp.tb, "group2", "group8"), 5)
  p58 <- round(getPvalWilcox(temp.tb, "group5", "group8"), 5)
  
  p <- ggplot(temp.tb, aes(x = group, y = regDensity)) + geom_violin(aes(fill = group), show.legend = FALSE) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() +
    annotate("text", x = 1, y = (ymin + ymax)/2 + 1, label = paste0("p12: ", convPvalue(p12), "\n",
                                                         "p15: ", convPvalue(p15), "\n",
                                                         "p18: ", convPvalue(p18), "\n",
                                                         "p25: ", convPvalue(p25), "\n",
                                                         "p28: ", convPvalue(p28), "\n",
                                                         "p58: ",convPvalue( p58), "\n"),
             color = "black", hjust = 0, size = 3)
  
  
  fileName <- paste0("regDensity_", note)
  height <- 3
  width <- 3
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}
plot_regDensity(gene.TSS.tb.plot, "group", ymax = 10)

Checking TAD average log2FC

diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj)

temp <- left_join(gene.TSS.tb, diff.RNA, by = c("ensembl" = "ensembl_gene_id")) %>% dplyr::filter(!is.na(log2FoldChange),
                                                                                                  !is.na(TAD))


data <- temp %>% group_by(TAD) %>%
  dplyr::summarise(absAvgLog2FC = abs(mean(log2FoldChange)),
                   avgLog2FC = mean(log2FoldChange),
                   geneDensity = mean(geneDensity),
                   enhDensity = mean(enhDensity),
                   regDensity = mean(regDensity))

data$geneDensityGroup <- cut(
  data$geneDensity,
  breaks = quantile(data$geneDensity, probs = seq(0, 1, 0.2), na.rm = TRUE),
  include.lowest = TRUE,
  labels = paste0(seq(0, 80, 20), "-", seq(20, 100, 20), "%")
)

data$enhDensityGroup <- cut(
  data$enhDensity,
  breaks = quantile(data$enhDensity, probs = seq(0, 1, 0.2), na.rm = TRUE),
  include.lowest = TRUE,
  labels = paste0(seq(0, 80, 20), "-", seq(20, 100, 20), "%")
)


### Ploting  grouping

p <- ggplot(data, aes(x = enhDensityGroup, y = enhDensity, fill = enhDensityGroup)) + 
  geom_boxplot(color = "black",
               linewidth = lineThick * mmToLineUnit, lineend = "square",
               outlier.shape = NA) + theme_classic() +
  coord_cartesian(ylim = c(0, quantile(data$enhDensity, 0.99))) +
  labs(x = "Enhancer Density Group" , y = "TAD enhancer density") +
  scale_fill_manual(values = c("#D9D9D9", "#BFBFBF", "#A6A6A6", "#8C8C8C", "#737373")) +
  theme(
    legend.position = "none",
    axis.title = element_text(
      size = fontSizeM, family = fontType, color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45, hjust = 1, vjust = 1
    ),
    axis.line = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )

fileName <- paste0("tadGroup_enhDensity")
width <- panelSize(1.2)*mmToInch
height <- panelSize(1.5)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()




p <- ggplot(data, aes(x = geneDensityGroup, y = geneDensity, fill = geneDensityGroup)) + 
  geom_boxplot(color = "black",
               linewidth = lineMedium * mmToLineUnit, lineend = "square",
               outlier.shape = NA) + theme_classic() +
  coord_cartesian(ylim = c(0, quantile(data$geneDensity, 0.99))) +
  labs(x = "Gene Density Group" , y = "Gene density") +
  scale_fill_manual(values = c("#777777", "#8B7E65", "#A28452", "#C2884D", "#F28E2C")) +
  theme(
    legend.position = "none",
    axis.title = element_text(
      size = fontSizeM, family = fontType, color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45, hjust = 1, vjust = 1
    ),
    axis.line = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )
fileName <- paste0("tadGroup_geneDensity")
width <- 31*mmToInch
height <- 38*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()


### Plotting distribution - enh
 getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(enhDensityGroup ==group1) )$avgLog2FC
  distance2 <- (data %>% dplyr::filter(enhDensityGroup ==group2) )$avgLog2FC
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}


#### ALL
# ylim <- max(abs(quantile(data$avgLog2FC, 0.01)), abs(quantile(data$avgLog2FC, 0.99)))
# p <- ggplot(data, aes(x = enhDensityGroup, y = avgLog2FC)) + geom_violin(aes(fill = enhDensityGroup))+ 
#   geom_boxplot(outlier.shape = NA, width = 0.1) + theme_classic() +
#   coord_cartesian(ylim = c(-ylim, ylim)) +
#   geom_hline(yintercept = 0) +   theme(legend.position = "none") +
#   scale_fill_manual(values = c("#d9d9d9", "#bdbdbd", "#969696", "#737373", "#525252"))
# 
# fileName <- paste0("tadGroup_enhDensity_avgLog2FC")
# height <- 3
# width <- 3
# png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
# print(p)
# dev.off()
# svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
# print(p)
# dev.off()
#### UP
data.plus <- data %>% dplyr::filter(avgLog2FC > 0)
ps01 <- round(getPvalWilcox(data.plus, "0-20%", "20-40%"), 5)
ps12 <- round(getPvalWilcox(data.plus, "20-40%", "40-60%"), 5)
ps23 <- round(getPvalWilcox(data.plus, "40-60%", "60-80%"), 5)
ps34 <- round(getPvalWilcox(data.plus, "60-80%", "80-100%"), 5)



p <- ggplot(data.plus, aes(x = enhDensityGroup, y = avgLog2FC)) + 
  geom_violin(aes(fill = enhDensityGroup),
              color = "black",
              linewidth = lineThick * mmToLineUnit, lineend = "square",
              show.legend = FALSE)+ 
  geom_boxplot(width = 0.1, color = "black",
               linewidth = lineThick * mmToLineUnit, lineend = "square",
               outlier.shape = NA) + theme_classic() +
  geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
  labs(x = "Enhancer density group" , y = "avg log2(fold change)") +
  stat_summary(
    aes(group = enhDensityGroup), fun = mean,
    geom = "point", shape = 21, size = 1,
    fill = "red", color = "black"
  ) +
  scale_fill_manual(values = c("#D9D9D9", "#BFBFBF", "#A6A6A6", "#8C8C8C", "#737373")) +
  theme(
    axis.title = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45, hjust = 1, vjust = 1
    ),
    axis.line = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )+ coord_cartesian(ylim = c(0, 1.5)) +
  annotate("text", x = 1, y = 1, label = paste0("ps01: ", convPvalue(ps01), "\n",
                                                "ps12: ", convPvalue(ps12), "\n",
                                                "ps23: ", convPvalue(ps23), "\n",
                                                "ps34: ", convPvalue(ps34), "\n"), 
           color = "black", hjust = 0, size = 2)


fileName <- paste0("tadGroup_enhDensity_avgLog2FC_up")
width <- panelSize(1.15)*mmToInch
height <- panelSize(1.07)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()


#### DOWN
data.minus <- data %>% dplyr::filter(avgLog2FC < 0)
ps01 <- round(getPvalWilcox(data.minus, "0-20%", "20-40%"), 5)
ps12 <- round(getPvalWilcox(data.minus, "20-40%", "40-60%"), 5)
ps23 <- round(getPvalWilcox(data.minus, "40-60%", "60-80%"), 5)
ps34 <- round(getPvalWilcox(data.minus, "60-80%", "80-100%"), 5)


p <- ggplot(data.minus, aes(x = enhDensityGroup, y = avgLog2FC)) + 
  geom_violin(aes(fill = enhDensityGroup),
              color = "black",
              linewidth = lineThick * mmToLineUnit, lineend = "square",
              show.legend = FALSE)+ 
  geom_boxplot(width = 0.1, color = "black",
               linewidth = lineThick * mmToLineUnit, lineend = "square",
               outlier.shape = NA) + theme_classic() +
  geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
  labs(x = "Enhancer density group" , y = "avg log2(fold change)") +
  stat_summary(
    aes(group = enhDensityGroup), fun = mean,
    geom = "point", shape = 21, size = 1,
    fill = "red", color = "black"
  ) +
  scale_fill_manual(values = c("#D9D9D9", "#BFBFBF", "#A6A6A6", "#8C8C8C", "#737373")) +
  theme(
    axis.title = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45, hjust = 1, vjust = 1
    ),
    axis.line = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )+ coord_cartesian(ylim = c(-1.5, 0))+
  annotate("text", x = 1, y = -1, label = paste0("ps01: ", convPvalue(ps01), "\n",
                                                "ps12: ", convPvalue(ps12), "\n",
                                                "ps23: ", convPvalue(ps23), "\n",
                                                "ps34: ", convPvalue(ps34), "\n"), 
           color = "black", hjust = 0, size = 2)

fileName <- paste0("tadGroup_enhDensity_avgLog2FC_down")
width <- panelSize(1.175)*mmToInch
height <- panelSize(1.07)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()



### Plotting distribution - gene
#### ALL
# ylim <- max(abs(quantile(data$avgLog2FC, 0.01)), abs(quantile(data$avgLog2FC, 0.99)))
# p <- ggplot(data, aes(x = geneDensityGroup, y = avgLog2FC)) + geom_violin(aes(fill = geneDensityGroup))+ 
#   geom_boxplot(outlier.shape = NA, width = 0.1) + theme_classic() +
#   coord_cartesian(ylim = c(-ylim, ylim)) +
#   geom_hline(yintercept = 0) +   theme(legend.position = "none") +
#   scale_fill_manual(values = c("#d9d9d9", "#bdbdbd", "#969696", "#737373", "#525252"))
# 
# fileName <- paste0("tadGroup_geneDensity_avgLog2FC")
# height <- 3
# width <- 3
# png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
# print(p)
# dev.off()
# svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
# print(p)
# dev.off()
 getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(geneDensityGroup ==group1) )$avgLog2FC
  distance2 <- (data %>% dplyr::filter(geneDensityGroup ==group2) )$avgLog2FC
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}


data.plus <- data %>% dplyr::filter(avgLog2FC > 0)
ps01 <- round(getPvalWilcox(data.plus, "0-20%", "20-40%"), 5)
ps12 <- round(getPvalWilcox(data.plus, "20-40%", "40-60%"), 5)
ps23 <- round(getPvalWilcox(data.plus, "40-60%", "60-80%"), 5)
ps34 <- round(getPvalWilcox(data.plus, "60-80%", "80-100%"), 5)



p <- ggplot(data.plus, aes(x = geneDensityGroup, y = avgLog2FC)) + 
  geom_violin(aes(fill = geneDensityGroup),
              color = "black",
              linewidth = lineThick * mmToLineUnit, lineend = "square",
              show.legend = FALSE)+ 
  geom_boxplot(width = 0.1, color = "black",
               linewidth = lineThick * mmToLineUnit, lineend = "square",
               outlier.shape = NA) + theme_classic() +
  geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
  labs(x = "Gene density group" , y = "avg log2(fold change)") +
  stat_summary(
    aes(group = geneDensityGroup), fun = mean,
    geom = "point", shape = 21, size = 1,
    fill = "red", color = "black"
  ) +
  scale_fill_manual(values = c("#D9D9D9", "#BFBFBF", "#A6A6A6", "#8C8C8C", "#737373")) +
  theme(
    axis.title = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45, hjust = 1, vjust = 1
    ),
    axis.line = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )+ coord_cartesian(ylim = c(0, 1.5)) +
  annotate("text", x = 1, y = 1, label = paste0("ps01: ", convPvalue(ps01), "\n",
                                                "ps12: ", convPvalue(ps12), "\n",
                                                "ps23: ", convPvalue(ps23), "\n",
                                                "ps34: ", convPvalue(ps34), "\n"), 
           color = "black", hjust = 0, size = 2)


fileName <- paste0("tadGroup_geneDensity_avgLog2FC_up")
width <- panelSize(1.15)*mmToInch
height <- panelSize(1.07)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()


#### DOWN
data.minus <- data %>% dplyr::filter(avgLog2FC < 0)
ps01 <- round(getPvalWilcox(data.minus, "0-20%", "20-40%"), 5)
ps12 <- round(getPvalWilcox(data.minus, "20-40%", "40-60%"), 5)
ps23 <- round(getPvalWilcox(data.minus, "40-60%", "60-80%"), 5)
ps34 <- round(getPvalWilcox(data.minus, "60-80%", "80-100%"), 5)


p <- ggplot(data.minus, aes(x = geneDensityGroup, y = avgLog2FC)) + 
  geom_violin(aes(fill = geneDensityGroup),
              color = "black",
              linewidth = lineThick * mmToLineUnit, lineend = "square",
              show.legend = FALSE)+ 
  geom_boxplot(width = 0.1, color = "black",
               linewidth = lineThick * mmToLineUnit, lineend = "square",
               outlier.shape = NA) + theme_classic() +
  geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
  labs(x = "Gene density group" , y = "avg log2(fold change)") +
  stat_summary(
    aes(group = geneDensityGroup), fun = mean,
    geom = "point", shape = 21, size = 1,
    fill = "red", color = "black"
  ) +
  scale_fill_manual(values = c("#D9D9D9", "#BFBFBF", "#A6A6A6", "#8C8C8C", "#737373")) +
  theme(
    axis.title = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45, hjust = 1, vjust = 1
    ),
    axis.line = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )+ coord_cartesian(ylim = c(-1.5, 0))+
  annotate("text", x = 1, y = -1, label = paste0("ps01: ", convPvalue(ps01), "\n",
                                                "ps12: ", convPvalue(ps12), "\n",
                                                "ps23: ", convPvalue(ps23), "\n",
                                                "ps34: ", convPvalue(ps34), "\n"), 
           color = "black", hjust = 0, size = 2)

fileName <- paste0("tadGroup_geneDensity_avgLog2FC_down")
width <- panelSize(1.175)*mmToInch
height <- panelSize(1.07)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()


### CDF plot

ks_result <- ks.test(
  data %>% dplyr::filter(geneDensityGroup == "0-20%") %>% pull(absAvgLog2FC),
  data %>% dplyr::filter(geneDensityGroup == "20-40%") %>% pull(absAvgLog2FC)
)


p <- ggplot(data, aes(x = absAvgLog2FC, color = geneDensityGroup)) +
  scale_color_manual(values = rev(c("#777777", "#8B7E65", "#A28452", "#C2884D", "#F28E2C"))) +
  stat_ecdf(size = 0.4, linewidth = lineMedium * mmToLineUnit, lineend = "square" ) + # Use stat_ecdf to plot the empirical CDF
  labs(
    x = "Abs. log2(fold change)",
    y = "Cumulative Probability"
  ) + coord_cartesian(xlim = c(0, 1.5)) +
  theme_classic() + # Clean theme
  theme(
      axis.title = element_text(
        size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.position = "none",
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS),
    ) + scale_y_continuous(labels = scales::number_format(accuracy = 0.1))


fileName <- paste0("tadGroup_geneDensity_avgLog2FC_cdf")
width <- 33*mmToInch
height <-33*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

data <- data.minus

group1_data <- data$absAvgLog2FC[data$geneDensityGroup == "0-20%"]
group2_data <- data$absAvgLog2FC[data$geneDensityGroup == "20-40%"]
ks.test(group1_data, group2_data)

group1_data <- data$absAvgLog2FC[data$geneDensityGroup == "20-40%"]
group2_data <- data$absAvgLog2FC[data$geneDensityGroup == "40-60%"]
ks.test(group1_data, group2_data)

group1_data <- data$absAvgLog2FC[data$geneDensityGroup == "40-60%"]
group2_data <- data$absAvgLog2FC[data$geneDensityGroup == "60-80%"]
ks.test(group1_data, group2_data)

group1_data <- data$absAvgLog2FC[data$geneDensityGroup == "60-80%"]
group2_data <- data$absAvgLog2FC[data$geneDensityGroup == "80-100%"]
ks.test(group1_data, group2_data)

[2.18] Comparing TAD insulation score across samples

Checking how insulation score changes by treatment at DMSO TAD boundaries

name <- "chromo_cons_annoHierarchy"
consensus.loop.anno.tb <- fread(here(consensusDir, paste0(name, ".tsv")))

# Import insulation score calculated with python
resultDir <- here("../../result")
insScore.DMSO <- fread(here(resultDir, "TAD", "insulationScore_25kb_G1DMSO.tsv")) %>% 
  dplyr::select(c("chrom", "start", "end", "log2_insulation_score_125000")) %>%
  dplyr::mutate(binID = paste(chrom, start, end, sep = "_"))
colnames(insScore.DMSO) <- c("chrom", "start", "end", "insulationScore", "binID")
insScore.dTAG <- fread(here(resultDir, "TAD", "insulationScore_25kb_G1dTAG.tsv")) %>% 
  dplyr::select(c("chrom", "start", "end", "log2_insulation_score_125000")) %>%
  dplyr::mutate(binID = paste(chrom, start, end, sep = "_"))
colnames(insScore.dTAG) <- c("chrom", "start", "end", "insulationScore", "binID")
insScore.A485 <- fread(here(resultDir, "TAD", "insulationScore_25kb_G1A485.tsv")) %>% 
  dplyr::select(c("chrom", "start", "end", "log2_insulation_score_125000")) %>%
  dplyr::mutate(binID = paste(chrom, start, end, sep = "_"))
colnames(insScore.A485) <- c("chrom", "start", "end", "insulationScore", "binID")

getInsulationScore <- function(chr, coordinate, insScore.tb){
  temp.tb <- insScore.tb %>% dplyr::filter(chrom == chr, start < coordinate, end > coordinate)
  out <- temp.tb$insulationScore
  if(length(out) < 1){
    return(NA)
  }else{
    return(temp.tb$insulationScore)
  }
}

# Importing TAD boundaries
tad_boundary <- fread(here("../../result/TAD", "TAD_25kb_125kb_otsu_boundaries_G1DMSO.bed"))
colnames(tad_boundary) <- c("chr", "start", "end")
tad_boundary <- tad_boundary %>% rowwise() %>%
  dplyr::mutate(tad_id = paste(chr, start, end, sep = "_"),
                center = (start + end)/2,
                insScore_DMSO = getInsulationScore(chr, center, insScore.DMSO),
                insScore_dTAG = getInsulationScore(chr, center, insScore.dTAG),
                insScore_A485 = getInsulationScore(chr, center, insScore.A485))

tad_boundary <- tad_boundary %>% filter(!if_any(everything(), is.na))

tad_boundary$density <- get_density(tad_boundary$insScore_DMSO, tad_boundary$insScore_dTAG, n = 100)
tad_boundary <- tad_boundary %>% dplyr::arrange(density)
ggplot(tad_boundary, aes(x = insScore_DMSO, y = insScore_dTAG, color = density)) + 
  geom_point(show.legend = FALSE) + 
  scale_color_viridis() + coord_fixed() +
  geom_abline(slope = 1, intercept = 0) + theme_bw()



tad_boundary$density <- get_density(tad_boundary$insScore_DMSO, tad_boundary$insScore_A485, n = 100)
tad_boundary <- tad_boundary %>% dplyr::arrange(density)
ggplot(tad_boundary, aes(x = insScore_DMSO, y = insScore_A485, color = density)) + 
  geom_point(show.legend = FALSE) + 
  scale_color_viridis() + coord_fixed() +
  geom_abline(slope = 1, intercept = 0) + theme_bw()

[2.20] Checking how many loops cross the boundary?

Previously, I tried to check the percentage of loops crossing the boundary per group which wasn’t fruitful. This time, try to do this on the differential loops. #### dTAG

### Importing differential regulatory loops

loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_UP_diff0.2.bedpe")) %>%
  dplyr::mutate(group = "UP/NO")
colnames(loop.up) <- c("chr1", "start1", "end1", "chr2", "start2", "end2", "group")
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_NO_diff0.2.bedpe")) %>%
  dplyr::mutate(group = "UP/NO")
colnames(loop.no) <- c("chr1", "start1", "end1", "chr2", "start2", "end2", "group")
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_DOWN_diff0.2.bedpe")) %>%
  dplyr::mutate(group = "DOWN")
colnames(loop.down) <- c("chr1", "start1", "end1", "chr2", "start2", "end2", "group")

loops <- bind_rows(bind_rows(loop.up, loop.no), loop.down)

### Importing TAD boundary
tad_boundary <- fread(here("../../result/TAD", "TAD_25kb_125kb_otsu_boundaries_G1DMSO.bed"))
colnames(tad_boundary) <- c("chr", "start", "end")
tad_boundary <- tad_boundary %>% dplyr::mutate(tad_id = paste(chr, start, end, sep = "_"),
                                               start = start -175000/2,
                                               end = end + 175000/2,
                                               center = (start + end)/2)
### Functions
checkBoundaryCross <- function(chrom1, start1, end2, tad_boundary){
  temp <- tad_boundary %>% dplyr::filter(chr == chrom1,
                                 center > start1,
                                 center < end2)
  return(length(temp$center))
}

### Check overlap
loops <- loops %>% rowwise() %>%
  dplyr::mutate(boundaryCross = checkBoundaryCross(chr1, start1, end2, tad_boundary))

### Plotting
summary_data <- loops %>%
  group_by(group) %>%
  summarise(percentage = mean(boundaryCross > 0) * 100)

summary_data$group <- factor(summary_data$group, levels = c("UP/NO", "DOWN"))

p <- ggplot(summary_data, aes(x = group, y = percentage)) +
  geom_bar(stat = "identity", fill = darken(strong_green, amount = 0.2)) +
  labs(y = "% of loops crossing TAD boundary", x = NULL) +
  theme_classic() + ylim(0, 100) +
  theme(
    axis.title = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
  )

fileName <- paste0("boundaryCrossRatio")
width <- panelSize(1)*mmToInch
height <- panelSize(1.5)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

A485

### Importing differential regulatory loops
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_UP_diff0.2.bedpe")) %>%
  dplyr::mutate(group = "up")
colnames(loop.up) <- c("chr1", "start1", "end1", "chr2", "start2", "end2", "group")
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_NO_diff0.2.bedpe")) %>%
  dplyr::mutate(group = "no")
colnames(loop.no) <- c("chr1", "start1", "end1", "chr2", "start2", "end2", "group")
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_DOWN_diff0.2.bedpe")) %>%
  dplyr::mutate(group = "down")
colnames(loop.down) <- c("chr1", "start1", "end1", "chr2", "start2", "end2", "group")

loops <- bind_rows(bind_rows(loop.up, loop.no), loop.down)

### Importing TAD boundary
tad_boundary <- fread(here("../../result/TAD", "TAD_25kb_125kb_otsu_boundaries_G1DMSO.bed"))
colnames(tad_boundary) <- c("chr", "start", "end")
tad_boundary <- tad_boundary %>% dplyr::mutate(tad_id = paste(chr, start, end, sep = "_"),
                                               start = start -175000/2,
                                               end = end + 175000/2,
                                               center = (start + end)/2)
### Functions
checkBoundaryCross <- function(chrom1, start1, end2, tad_boundary){
  temp <- tad_boundary %>% dplyr::filter(chr == chrom1,
                                 center > start1,
                                 center < end2)
  return(length(temp$center))
}

### Check overlap
loops <- loops %>% rowwise() %>%
  dplyr::mutate(boundaryCross = checkBoundaryCross(chr1, start1, end2, tad_boundary))

### Plotting
summary_data <- loops %>%
  group_by(group) %>%
  summarise(percentage = mean(boundaryCross > 0) * 100)


p <- ggplot(summary_data, aes(x = group, y = percentage)) +
  geom_bar(stat = "identity", fill = "skyblue") +
  labs(title = "Percentage of Rows with boundaryCross > 0 per Group",
       x = "Group",
       y = "Percentage") +
  theme_classic() + ylim(0, 100)

fileName <- paste0("boundaryCrossRatio_A485")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

[2.21] LOLA on differential loop anchors

LOADING LOLA
# LOADING LOLA DB
library("simpleCache")
library("LOLA")
lolaDB = loadRegionDB("/Volumes/UKJIN_SSD/Genomics_03_Analysis_Working/reference/LOLACore_cached/mm10")

# FUNCTIONS
extractAnchor <- function(loop){
  anchor1 <- loop %>% dplyr::select(c(1, 2, 3))
  colnames(anchor1) <- c("chr", "start", "end")
  anchor2 <- loop %>% dplyr::select(c(4, 5, 6))
  colnames(anchor2) <- c("chr", "start", "end")
  anchors <- reduce(makeGRangesFromDataFrame(bind_rows(anchor1, anchor2)))
    return(anchors)
}

dTAG, Comparing to Async

- Limited to ATAC-seq signal, reg loop backgroupd
atac <- fread(here(refDir, "GSM3106257_ATAC_ESC_1.bed")) %>% dplyr::select(V1, V2, V3)
colnames(atac) <- c("chr", "start", "end")
atac.gr <- makeGRangesFromDataFrame(atac)

# LOADING LOOPS
### Importing differential regulatory loops & extract anchor
loop.all <- fread(here(consensusDir, "chromo_cons_annoHierarchy.tsv")) %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "E-E"))
anchor.all <- (extractAnchor(loop.all))
overlaps <- findOverlaps(anchor.all, atac.gr)
anchor.all <- pintersect(anchor.all[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])



loop.1 <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_G1vsAsync_bothRetained.bedpe"))
anchor.1 <- (extractAnchor(loop.1))
overlaps <- findOverlaps(anchor.1, atac.gr)
anchor.1 <- pintersect(anchor.1[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

loop.2 <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_G1vsAsync_AsyncSpecificPert.bedpe"))
anchor.2 <- (extractAnchor(loop.2))
overlaps <- findOverlaps(anchor.2, atac.gr)
anchor.2 <- pintersect(anchor.2[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

loop.3 <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_G1vsAsync_G1SpecificPert.bedpe"))
anchor.3 <- (extractAnchor(loop.3))
overlaps <- findOverlaps(anchor.3, atac.gr)
anchor.3 <- pintersect(anchor.3[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

loop.4 <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_G1vsAsync_bothPert.bedpe"))
anchor.4 <- (extractAnchor(loop.4))
overlaps <- findOverlaps(anchor.4, atac.gr)
anchor.4 <- pintersect(anchor.4[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])


anchors <- list(anchor.1, anchor.2, anchor.3, anchor.4)
tbs <- list()
temps <- list()

# Process clusters c1 to c8
for (i in 1:4) {
  
  anchor <- anchors[[i]]
  # Run LOLA
  result <- runLOLA(anchor, anchor.all, lolaDB)
  tb <- as_tibble(result)
  
  # Filter and summarize
  tb <- tb %>%
    dplyr::mutate(target = toupper(antibody)) %>%
    filter(str_to_lower(cellType) == "embryonic stem cell") %>%
    dplyr::filter(qValue < alpha) %>%
    dplyr::group_by(target) %>%
    slice_min(meanRnk, with_ties = FALSE)
  
  # Store tb
  tbs[[i]] <- tb
  
  # Select and rename oddsRatio
  temp <- tb %>% dplyr::select(target, oddsRatio)
  colnames(temp) <- c("target", paste0("OR_c", i))
  
  # Store temp
  temps[[i]] <- temp
}

# Merge all temp tables into one
temp <- Reduce(function(x, y) full_join(x, y, by = "target"), temps) %>%
  mutate_all(~replace_na(., 1))
colnames(temp) <- c("target", "bothRetained", "AsyncSpecificPerturb", "G1SpecificPerturb", "bothPerturb")
data <- temp %>% column_to_rownames("target") %>% as.matrix()

library(circlize)
col_fun <- colorRamp2(c(1, max(data)), c("white", "red"))


#fviz_nbclust(data, kmeans, method = "wss")

p <- Heatmap(
  data,
  name = "Odds Ratio",                   # Name of the heatmap legend
  cluster_columns = FALSE,            # Remove column dendrogram
  show_row_dend = FALSE,
  col = col_fun,
  border = TRUE
)

dTAG

- Limiting to ATAC-seq signal, reg loop background
atac <- fread(here(refDir, "GSM3106257_ATAC_ESC_1.bed")) %>% dplyr::select(V1, V2, V3)
colnames(atac) <- c("chr", "start", "end")
atac.gr <- makeGRangesFromDataFrame(atac)

# LOADING LOOPS
### Importing differential regulatory loops & extract anchor
loop.all <- fread(here(consensusDir, "chromo_cons_annoHierarchy.tsv")) %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "E-E"))
anchor.all <- (extractAnchor(loop.all))
overlaps <- findOverlaps(anchor.all, atac.gr)
anchor.all <- pintersect(anchor.all[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])


loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_UP_diff0.2.bedpe"))
anchor.up <- (extractAnchor(loop.up))
overlaps <- findOverlaps(anchor.up, atac.gr)
anchor.up <- pintersect(anchor.up[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_NO_diff0.2.bedpe"))
anchor.no <- (extractAnchor(loop.no))
overlaps <- findOverlaps(anchor.no, atac.gr)
anchor.no <- pintersect(anchor.no[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

loop.upno <- bind_rows(loop.up, loop.no)
anchor.upno <- (extractAnchor(loop.upno))
overlaps <- findOverlaps(anchor.upno, atac.gr)
anchor.upno <- pintersect(anchor.upno[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_DOWN_diff0.2.bedpe"))
anchor.down <- (extractAnchor(loop.down))
overlaps <- findOverlaps(anchor.down, atac.gr)
anchor.down <- pintersect(anchor.down[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
# RUNNING LOLA
lolaDir <- here("../../result/lola")
dir.create(lolaDir, showWarnings = FALSE, recursive = TRUE)

# UP
result = runLOLA(anchor.up, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_up_atac.tsv"), sep = "\t")

# NO
result = runLOLA(anchor.no, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_no_atac.tsv"), sep = "\t")

# UPNO
result = runLOLA(anchor.upno, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_upno_atac.tsv"), sep = "\t")

# DOWN
result = runLOLA(anchor.down, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_down_atac.tsv"), sep = "\t")


### HEATMAP
alpha <- 0.05
tb.up <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_up_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.no <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_no_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.down <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_down_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)

temp.up <- tb.up %>% dplyr::select(target, oddsRatio)
colnames(temp.up) <- c("target", "OR_up")
temp.no <- tb.no %>% dplyr::select(target, oddsRatio)
colnames(temp.no) <- c("target", "OR_no")
temp.down <- tb.down %>% dplyr::select(target, oddsRatio)
colnames(temp.down) <- c("target", "OR_down")


temp <- full_join(full_join(temp.up, temp.no, by = c("target")), temp.down, by = c("target")) %>% mutate_all(~replace_na(., 1))

data <- as.matrix(temp[2:4])
rownames(data) <- temp$target

library(circlize)
col_fun <- colorRamp2(c(min(data), max(data)), c("white", "red"))


#fviz_nbclust(data, kmeans, method = "wss")

# p <- Heatmap(
#   data,
#   name = "Odds Ratio",                   # Name of the heatmap legend
#   cluster_columns = FALSE,            # Remove column dendrogram
#   row_km = 4,                         # Define the number of k-means clusters for rows (adjust as needed)
#   show_row_dend = FALSE,
#   col = col_fun                       # Use the red gradient color scale
# )
# 
# fileName <- paste0("anchorLOLA_dTAG_vs_DMSO_diff0.2_regAnchorBackground_atac")
# height <- 7
# width <- 3.5
# png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
# print(p)
# dev.off()
# svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
# print(p)
# dev.off()
# 


### Visualizing p-value and OR
alpha <- 0.05
# tb.up <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_up_atac.tsv")) %>%
#   dplyr::mutate(target = toupper(antibody),
#                 qValueLog = -log2(qValue),
#                 group = "UP") %>%
#   filter(str_to_lower(cellType) == "embryonic stem cell") %>%
#   dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
#   slice_min(meanRnk, with_ties = FALSE)
# tb.no <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_no_atac.tsv")) %>%
#   dplyr::mutate(target = toupper(antibody),
#                 qValueLog = -log2(qValue),
#                 group = "NO") %>%
#   filter(str_to_lower(cellType) == "embryonic stem cell") %>%
#   dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
#   slice_min(meanRnk, with_ties = FALSE)

tb.upno <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_upno_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody),
                qValueLog = -log10(qValue),
                group = "UP/NO") %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
fwrite(tb.upno %>% dplyr::select(c(24, 16, 20,
                                   4, 25, 5,
                                   7, 8, 9, 10, 11,
                                   12, 13, 14)), here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_upno_atac_pub.tsv"), sep = "\t")

tb.down <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_down_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody),
                qValueLog = -log10(qValue),
                group = "DOWN") %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
fwrite(tb.down %>% dplyr::select(c(24, 16, 20,
                                   4, 25, 5,
                                   7, 8, 9, 10, 11,
                                   12, 13, 14)), here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_down_atac_pub.tsv"), sep = "\t")


temp.upno <- tb.upno %>% dplyr::select(target, oddsRatio, qValueLog, group)
temp.down <- tb.down %>% dplyr::select(target, oddsRatio, qValueLog, group)

temp <- bind_rows(temp.upno, temp.down)

# order <- c((temp %>% dplyr::filter(group == "UP/NO") %>% arrange(desc(oddsRatio)))$target, 
#            (temp %>% dplyr::filter(group == "DOWN") %>% arrange(desc(oddsRatio)))$target)

# temp$target <- factor(temp$target, levels = rev(order))
temp$group <- factor(temp$group, levels = c("UP/NO", "DOWN"))

targetList <- c("POLR2A", "CTR9",
                "AFF4", "ELL2",
                "MED1", "MED12",
                "TBP", "TAF1", "TAF3",
                "E2F1", "YY1", "NIPBL", 
                "EP300", "DPY30", "SETDB1",
                "RAD21", "SMC1A", "SMC3", "CTCF",
                "SUZ12", "PHF19"
)
temp <- temp %>% dplyr::filter(target %in% targetList)
temp$target <- factor(temp$target, levels = rev(targetList))


qValueLogMax <- 50
temp2 <- temp %>% dplyr::mutate(qValueLog = min(qValueLog, qValueLogMax))

p <- ggplot(temp2, aes(x = group, y = target, fill = oddsRatio, size = qValueLog)) +
  geom_point(shape = 21,        # Ensures a point with an outline
             stroke = 1*ptToMM      # Line width for the border
  ) + theme_bw() + 
  scale_size_continuous(range = c(0.5, 2)) +  # Set min and max point sizes here
  scale_fill_gradient(low = "white", high = "#CB333A",
                      limits = c(1, 3),
                      oob = scales::squish, # Define gradient colors
                      guide = guide_colorbar(
                        barwidth = 1.5/5.08,  # Adjust width of the color bar
                        barheight = 15/5.08   # Adjust height of the color bar
                      )
  ) + 
  labs(x = NULL, y = NULL)  +
  theme(
    axis.title = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )
# p <- ggplot(temp, aes(x = group, y = target, color = qValueLog, size = oddsRatio)) +
#   geom_point() + theme_bw() + 
#   scale_size_continuous(range = c(1, 3)) +  # Set min and max point sizes here
#   scale_color_gradient(low = "blue", high = "red",
#                        guide = guide_colorbar(
#                          barwidth = 1.5/5.08,  # Adjust width of the color bar
#                          barheight = 15/5.08    # Adjust height of the color bar
#                        )) +
#   labs(x = NULL, y = NULL)  +
#   theme(
#     axis.title = element_text(
#       size = fontSizeS,
#       family = fontType,
#       color = "#000000"
#     ),
#     axis.text = element_text(
#       size = fontSizeS,
#       family = fontType,
#       color = "#000000"
#     ),
#     axis.text.x = element_text(
#       angle = 45,      # Rotate x-axis labels 45 degrees
#       hjust = 1,       # Adjust horizontal justification
#       vjust = 1        # Adjust vertical justification
#     ),
#     axis.line = element_line(
#       color = "#000000",
#       size = lineThick*mmToLineUnit,
#       lineend = "square"
#     ),
#     axis.ticks = element_line(
#       color = "#000000",
#       size = lineThick*mmToLineUnit,
#       lineend = "square"
#     ),
#     panel.background = element_rect(fill = "transparent"),
#     legend.text = element_text(family = fontType, size = fontSizeS),
#     legend.title = element_text(family = fontType, size = fontSizeS)
#   )

fileName <- paste0("anchorLOLA_dTAG_vs_DMSO_diff0.2_OR_qValue_regAnchorBackground_atac_upno")
width <- panelSize(1.5)*mmToInch
height <- panelSize(1.9)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
### 241015 Testing on subset of loops
anchor.c1 <- extractAnchor(loop.cluster1)
overlaps <- findOverlaps(anchor.c1, atac.gr)
anchor.c1 <- pintersect(anchor.c1[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

anchor.c2 <- extractAnchor(loop.cluster2)
overlaps <- findOverlaps(anchor.c2, atac.gr)
anchor.c2 <- pintersect(anchor.c2[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

anchor.c3 <- extractAnchor(loop.cluster3)
overlaps <- findOverlaps(anchor.c3, atac.gr)
anchor.c3 <- pintersect(anchor.c3[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

anchor.c4 <- extractAnchor(loop.cluster4)
overlaps <- findOverlaps(anchor.c4, atac.gr)
anchor.c4 <- pintersect(anchor.c4[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

anchor.c5 <- extractAnchor(loop.cluster5)
overlaps <- findOverlaps(anchor.c5, atac.gr)
anchor.c5 <- pintersect(anchor.c5[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

anchor.c6 <- extractAnchor(loop.cluster6)
overlaps <- findOverlaps(anchor.c6, atac.gr)
anchor.c6 <- pintersect(anchor.c6[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

anchor.c7 <- extractAnchor(loop.cluster7)
overlaps <- findOverlaps(anchor.c7, atac.gr)
anchor.c7 <- pintersect(anchor.c7[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

anchor.c8 <- extractAnchor(loop.cluster8)
overlaps <- findOverlaps(anchor.c8, atac.gr)
anchor.c8 <- pintersect(anchor.c8[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

result = runLOLA(anchor.c1, anchor.all, lolaDB)
tb.c1 = as_tibble(result)

result = runLOLA(anchor.c2, anchor.all, lolaDB)
tb.c2 = as_tibble(result)

result = runLOLA(anchor.c3, anchor.all, lolaDB)
tb.c3 = as_tibble(result)

result = runLOLA(anchor.c4, anchor.all, lolaDB)
tb.c4 = as_tibble(result)
result = runLOLA(anchor.c5, anchor.all, lolaDB)
tb.c5 = as_tibble(result)

result = runLOLA(anchor.c6, anchor.all, lolaDB)
tb.c6 = as_tibble(result)

result = runLOLA(anchor.c7, anchor.all, lolaDB)
tb.c7 = as_tibble(result)

result = runLOLA(anchor.c8, anchor.all, lolaDB)
tb.c8 = as_tibble(result)

### HEATMAP
alpha <- 0.05
tb.c1 <- tb.c1 %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.c2 <- tb.c2 %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.c3 <- tb.c3 %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.c4 <- tb.c4 %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.c5 <- tb.c5 %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>%
  dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)

tb.c6 <- tb.c6 %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>%
  dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)

tb.c7 <- tb.c7 %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>%
  dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)

tb.c8 <- tb.c8 %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>%
  dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)

temp.c1 <- tb.c1 %>% dplyr::select(target, oddsRatio)
colnames(temp.c1) <- c("target", "OR_c1")
temp.c2 <- tb.c2 %>% dplyr::select(target, oddsRatio)
colnames(temp.c2) <- c("target", "OR_c2")
temp.c3 <- tb.c3 %>% dplyr::select(target, oddsRatio)
colnames(temp.c3) <- c("target", "OR_c3")
temp.c4 <- tb.c4 %>% dplyr::select(target, oddsRatio)
colnames(temp.c4) <- c("target", "OR_c4")
temp.c5 <- tb.c5 %>% dplyr::select(target, oddsRatio)
colnames(temp.c5) <- c("target", "OR_c5")
temp.c6 <- tb.c6 %>% dplyr::select(target, oddsRatio)
colnames(temp.c6) <- c("target", "OR_c6")
temp.c7 <- tb.c7 %>% dplyr::select(target, oddsRatio)
colnames(temp.c7) <- c("target", "OR_c7")
temp.c8 <- tb.c8 %>% dplyr::select(target, oddsRatio)
colnames(temp.c8) <- c("target", "OR_c8")

temp <- full_join(temp.c1, temp.c2, by = "target") %>%
  full_join(temp.c3, by = "target") %>%
  full_join(temp.c4, by = "target") %>%
  full_join(temp.c5, by = "target") %>%
  full_join(temp.c6, by = "target") %>%
  full_join(temp.c7, by = "target") %>%
  full_join(temp.c8, by = "target") %>%
  mutate_all(~replace_na(., 1))

data <- temp %>% column_to_rownames("target") %>% as.matrix()

library(circlize)
col_fun <- colorRamp2(c(1, max(data)), c("white", "red"))


#fviz_nbclust(data, kmeans, method = "wss")

p <- Heatmap(
  data,
  name = "Odds Ratio",                   # Name of the heatmap legend
  cluster_columns = FALSE,            # Remove column dendrogram
  show_row_dend = FALSE,
  col = col_fun,
  border = TRUE
)


# Initialize lists to store results
anchors <- list()
tbs <- list()
temps <- list()

# Process clusters c1 to c8
for (i in 1:4) {
  # Extract anchor
  loop_cluster <- get(paste0("loop.cluster", i))
  anchor <- extractAnchor(loop_cluster)
  
  # Find overlaps and intersect
  overlaps <- findOverlaps(anchor, atac.gr)
  anchor <- pintersect(anchor[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
  
  # Store anchor
  anchors[[i]] <- anchor
  
  # Run LOLA
  result <- runLOLA(anchor, anchor.all, lolaDB)
  tb <- as_tibble(result)
  
  # Filter and summarize
  tb <- tb %>%
    dplyr::mutate(target = toupper(antibody)) %>%
    filter(str_to_lower(cellType) == "embryonic stem cell") %>%
    dplyr::filter(qValue < alpha) %>%
    dplyr::group_by(target) %>%
    slice_min(meanRnk, with_ties = FALSE)
  
  # Store tb
  tbs[[i]] <- tb
  
  # Select and rename oddsRatio
  temp <- tb %>% dplyr::select(target, oddsRatio)
  colnames(temp) <- c("target", paste0("OR_c", i))
  
  # Store temp
  temps[[i]] <- temp
}

# Merge all temp tables into one
temp <- Reduce(function(x, y) full_join(x, y, by = "target"), temps) %>%
  mutate_all(~replace_na(., 1))
data <- temp %>% column_to_rownames("target") %>% as.matrix()

library(circlize)
col_fun <- colorRamp2(c(1, max(data)), c("white", "red"))


#fviz_nbclust(data, kmeans, method = "wss")

p <- Heatmap(
  data,
  name = "Odds Ratio",                   # Name of the heatmap legend
  cluster_columns = FALSE,            # Remove column dendrogram
  show_row_dend = FALSE,
  col = col_fun,
  border = TRUE
)
- Limited to ATAC-seq signal, all loop backgroup
atac <- fread(here(refDir, "GSM3106257_ATAC_ESC_1.bed")) %>% dplyr::select(V1, V2, V3)
colnames(atac) <- c("chr", "start", "end")
atac.gr <- makeGRangesFromDataFrame(atac)

# LOADING LOOPS
### Importing differential regulatory loops & extract anchor
loop.all <- fread(here(consensusDir, "chromo_cons_annoHierarchy.tsv"))
anchor.all <- (extractAnchor(loop.all))
overlaps <- findOverlaps(anchor.all, atac.gr)
anchor.all <- pintersect(anchor.all[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_UP_diff0.2.bedpe"))
anchor.up <- (extractAnchor(loop.up))
overlaps <- findOverlaps(anchor.up, atac.gr)
anchor.up <- pintersect(anchor.up[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_NO_diff0.2.bedpe"))
anchor.no <- (extractAnchor(loop.no))
overlaps <- findOverlaps(anchor.no, atac.gr)
anchor.no <- pintersect(anchor.no[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

loop.upno <- bind_rows(loop.up, loop.no)
anchor.upno <- (extractAnchor(loop.upno))
overlaps <- findOverlaps(anchor.upno, atac.gr)
anchor.upno <- pintersect(anchor.upno[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])


loopNum <- nrow(loop.upno)

# Selecting same number of extreme down loops
loop.down <- loop.all %>% dplyr::filter(diff_dTAG_DMSO < -0.2) %>% dplyr::arrange(diff_dTAG_DMSO) %>% slice_head(n = loopNum)
anchor.down <- (extractAnchor(loop.down))
overlaps <- findOverlaps(anchor.down, atac.gr)
anchor.down <- pintersect(anchor.down[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
# RUNNING LOLA
lolaDir <- here("../../result/lola")
dir.create(lolaDir, showWarnings = FALSE, recursive = TRUE)

# UP
result = runLOLA(anchor.up, anchor.all, lolaDB)
tb = as_tibble(result)
#fwrite(tb, here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_up_allLoops_extreme_atac.tsv"), sep = "\t")

# NO
result = runLOLA(anchor.no, anchor.all, lolaDB)
tb = as_tibble(result)
#fwrite(tb, here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_no_allLoops_extreme_atac.tsv"), sep = "\t")

# UPNO
result = runLOLA(anchor.upno, anchor.all, lolaDB)
tb = as_tibble(result)
#fwrite(tb, here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_upno_allLoops_extreme_atac.tsv"), sep = "\t")


# DOWN
result = runLOLA(anchor.down, anchor.all, lolaDB)
tb = as_tibble(result)
#fwrite(tb, here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_down_allLoops_extreme_atac.tsv"), sep = "\t")



### HEATMAP
alpha <- 0.05
tb.up <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_up_allLoops_extreme_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.no <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_no_allLoops_extreme_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.down <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_down_allLoops_extreme_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)

temp.up <- tb.up %>% dplyr::select(target, oddsRatio)
colnames(temp.up) <- c("target", "OR_up")
temp.no <- tb.no %>% dplyr::select(target, oddsRatio)
colnames(temp.no) <- c("target", "OR_no")
temp.down <- tb.down %>% dplyr::select(target, oddsRatio)
colnames(temp.down) <- c("target", "OR_down")


temp <- full_join(full_join(temp.up, temp.no, by = c("target")), temp.down, by = c("target")) %>% mutate_all(~replace_na(., 1))

data <- as.matrix(temp[2:4])
rownames(data) <- temp$target

library(circlize)
col_fun <- colorRamp2(c(min(data), max(data)), c("white", "red"))


#fviz_nbclust(data, kmeans, method = "wss")

# p <- Heatmap(
#   data,
#   name = "Odds Ratio",                   # Name of the heatmap legend
#   cluster_columns = FALSE,            # Remove column dendrogram
#   row_km = 10,                         # Define the number of k-means clusters for rows (adjust as needed)
#   show_row_dend = FALSE,
#   col = col_fun,
#   border = TRUE
# )
# 
# fileName <- paste0("anchorLOLA_dTAG_vs_DMSO_diff0.2_allLoops_extreme_allAnchorBackground_atac")
# height <- 7
# width <- 3.5
# png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
# print(p)
# dev.off()
# svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
# print(p)
# dev.off()




### Visualizing p-value and OR
alpha <- 0.05
# tb.up <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_up_allLoops_extreme_atac.tsv")) %>%
#   dplyr::mutate(target = toupper(antibody),
#                 qValueLog = -log2(qValue),
#                 group = "UP") %>%
#   filter(str_to_lower(cellType) == "embryonic stem cell") %>%
#   dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
#   slice_min(meanRnk, with_ties = FALSE)
# tb.no <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_no_allLoops_extreme_atac.tsv")) %>%
#   dplyr::mutate(target = toupper(antibody),
#                 qValueLog = -log2(qValue),
#                 group = "NO") %>%
#   filter(str_to_lower(cellType) == "embryonic stem cell") %>%
#   dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
#   slice_min(meanRnk, with_ties = FALSE)
tb.upno <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_upno_allLoops_extreme_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody),
                qValueLog = -log10(qValue),
                group = "UP/NO") %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
fwrite(tb.upno %>% dplyr::select(c(24, 16, 20,
                                   4, 25, 5,
                                   7, 8, 9, 10, 11,
                                   12, 13, 14)), here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_upno_allLoops_extreme_atac_pub.tsv"), sep = "\t")
tb.down <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_down_allLoops_extreme_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody),
                qValueLog = -log10(qValue),
                group = "DOWN") %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
fwrite(tb.down %>% dplyr::select(c(24, 16, 20,
                                   4, 25, 5,
                                   7, 8, 9, 10, 11,
                                   12, 13, 14)), here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_down_allLoops_extreme_atac_pub.tsv"), sep = "\t")

# temp.up <- tb.up %>% dplyr::select(target, oddsRatio, qValueLog, group)
# temp.no <- tb.no %>% dplyr::select(target, oddsRatio, qValueLog, group)
temp.upno <- tb.upno %>% dplyr::select(target, oddsRatio, qValueLog, group)
temp.down <- tb.down %>% dplyr::select(target, oddsRatio, qValueLog, group)

temp <- bind_rows(temp.upno, temp.down)
# 
# order <- c((temp %>% dplyr::filter(group == "UP/NO") %>% arrange(desc(oddsRatio)))$target, 
#            (temp %>% dplyr::filter(group == "DOWN") %>% arrange(desc(oddsRatio)))$target)
# temp$target <- factor(temp$target, levels = rev(order))
temp$group <- factor(temp$group, levels = c("UP/NO", "DOWN"))


targetList <- c("POLR2A", "CTR9",
                "AFF4", "ELL2",
                "MED1", "MED12",
                "TBP", "TAF1", "TAF3",
                "ESRRB", "KLF4", "NANOG", "POU5F1", "SOX2", "STAT3", "E2F1", "YY1",
                "EP300", "DPY30", "EZH2", "KDM2B", "KDB4B", "KDM4C", "KDM6B", "RBBP5",
                "RAD21", "SMC1A", "SMC3", "CTCF",
                "JARID2", "SUZ12"
)
temp <- temp %>% dplyr::filter(target %in% targetList)

temp$target <- factor(temp$target, levels = rev(targetList))
# MAX qValueLog to 50
qValueLogMax <- 50
temp2 <- temp %>% dplyr::mutate(qValueLog = min(qValueLog, qValueLogMax))
p <- ggplot(temp2, aes(x = group, y = target, fill = oddsRatio, size = qValueLog)) +
  geom_point(shape = 21,        # Ensures a point with an outline
             stroke = 0.5*ptToMM      # Line width for the border
  ) + theme_bw() + 
  scale_size_continuous(range = c(0.5, 2)) +  # Set min and max point sizes here
  scale_fill_gradient(low = "white", high = "#CB333A",
                      limits = c(1, 3),
                      oob = scales::squish, # Define gradient colors
                      guide = guide_colorbar(
                        barwidth = 1.5/5.08,  # Adjust width of the color bar
                        barheight = 15/5.08   # Adjust height of the color bar
                      )
  ) + 
  labs(x = NULL, y = NULL)  +
  theme(
    axis.title = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )

fileName <- paste0("anchorLOLA_dTAG_vs_DMSO_diff0.2_allLoops_extreme_OR_qValue_allAnchorBackground_atac_ordered")
width <- panelSize(1.5)*mmToInch
height <- panelSize(2.5)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

dTAG, obsexp

- Limited to ATAC-seq signal, all loop backgroup
atac <- fread(here(refDir, "GSM3106257_ATAC_ESC_1.bed")) %>% dplyr::select(V1, V2, V3)
colnames(atac) <- c("chr", "start", "end")
atac.gr <- makeGRangesFromDataFrame(atac)

# LOADING LOOPS
### Importing differential regulatory loops & extract anchor
loop.all <- fread(here(consensusDir, "chromo_cons_annoHierarchy.tsv"))
anchor.all <- (extractAnchor(loop.all))
overlaps <- findOverlaps(anchor.all, atac.gr)
anchor.all <- pintersect(anchor.all[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_logOE_all_dTAGvsDMSO_UP_diff0.5.bedpe"))
anchor.up <- (extractAnchor(loop.up))
overlaps <- findOverlaps(anchor.up, atac.gr)
anchor.up <- pintersect(anchor.up[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_logOE_all_dTAGvsDMSO_NO_diff0.5.bedpe"))
anchor.no <- (extractAnchor(loop.no))
overlaps <- findOverlaps(anchor.no, atac.gr)
anchor.no <- pintersect(anchor.no[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])


loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_logOE_all_dTAGvsDMSO_DOWN_diff0.5.bedpe"))
anchor.down <- (extractAnchor(loop.down))
overlaps <- findOverlaps(anchor.down, atac.gr)
anchor.down <- pintersect(anchor.down[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
# RUNNING LOLA
lolaDir <- here("../../result/lola")
dir.create(lolaDir, showWarnings = FALSE, recursive = TRUE)

# UP
result = runLOLA(anchor.up, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_up_allLoops_atac.tsv"), sep = "\t")

# NO
result = runLOLA(anchor.no, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_no_allLoops_atac.tsv"), sep = "\t")

# DOWN
result = runLOLA(anchor.down, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_down_allLoops_atac.tsv"), sep = "\t")



### HEATMAP
alpha <- 0.05
tb.up <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_up_allLoops_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.no <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_no_allLoops_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.down <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_down_allLoops_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)

temp.up <- tb.up %>% dplyr::select(target, oddsRatio)
colnames(temp.up) <- c("target", "OR_up")
temp.no <- tb.no %>% dplyr::select(target, oddsRatio)
colnames(temp.no) <- c("target", "OR_no")
temp.down <- tb.down %>% dplyr::select(target, oddsRatio)
colnames(temp.down) <- c("target", "OR_down")


temp <- full_join(full_join(temp.up, temp.no, by = c("target")), temp.down, by = c("target")) %>% mutate_all(~replace_na(., 1))

data <- as.matrix(temp[2:4])
rownames(data) <- temp$target

library(circlize)
col_fun <- colorRamp2(c(min(data), max(data)), c("white", "red"))


#fviz_nbclust(data, kmeans, method = "wss")

p <- Heatmap(
  data,
  name = "Odds Ratio",                   # Name of the heatmap legend
  cluster_columns = FALSE,            # Remove column dendrogram
  row_km = 4,                        # Define the number of k-means clusters for rows (adjust as needed)
  show_row_dend = FALSE,
  col = col_fun,
  border = TRUE
)

fileName <- paste0("anchorLOLA_dTAG_vs_DMSO_logOE_diff0.5_allLoops_allAnchorBackground_atac")
height <- 7
width <- 3.5
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()


############
# 
# ### Visualizing p-value and OR
# alpha <- 0.05
# tb.up <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_up_allLoops_atac.tsv")) %>%
#   dplyr::mutate(target = toupper(antibody),
#                 qValueLog = -log2(qValue),
#                 group = "UP") %>%
#   filter(str_to_lower(cellType) == "embryonic stem cell") %>%
#   dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
#   slice_min(meanRnk, with_ties = FALSE)
# tb.no <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_no_allLoops_atac.tsv")) %>%
#   dplyr::mutate(target = toupper(antibody),
#                 qValueLog = -log2(qValue),
#                 group = "NO") %>%
#   filter(str_to_lower(cellType) == "embryonic stem cell") %>%
#   dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
#   slice_min(meanRnk, with_ties = FALSE)
# tb.down <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_down_allLoops_atac.tsv")) %>%
#   dplyr::mutate(target = toupper(antibody),
#                 qValueLog = -log2(qValue),
#                 group = "DOWN") %>%
#   filter(str_to_lower(cellType) == "embryonic stem cell") %>%
#   dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
#   slice_min(meanRnk, with_ties = FALSE)
# 
# temp.up <- tb.up %>% dplyr::select(target, oddsRatio, qValueLog, group)
# temp.no <- tb.no %>% dplyr::select(target, oddsRatio, qValueLog, group)
# temp.down <- tb.down %>% dplyr::select(target, oddsRatio, qValueLog, group)
# 
# temp <- bind_rows(temp.up, temp.down)
# 
# order <- c((temp %>% dplyr::filter(group == "UP") %>% arrange(desc(oddsRatio)))$target, 
#            (temp %>% dplyr::filter(group == "DOWN") %>% arrange(desc(oddsRatio)))$target)
# temp$target <- factor(temp$target, levels = rev(order))
# p <- ggplot(temp, aes(x = group, y = target, color = qValueLog, size = oddsRatio)) +
#   geom_point() + theme_bw() + scale_color_gradient(low = "blue", high = "red") +
#   labs(x = NULL, y = NULL)  +   scale_size_continuous(range = c(1, 3)) +
#   theme(axis.text = element_text(size = 6),  # Set axis text size
#         axis.title = element_text(size = 6), # Set axis title size (if not removed)
#         legend.text = element_text(size = 6), # Set legend text size
#         legend.title = element_text(size = 6)) 
# 
# fileName <- paste0("anchorLOLA_dTAG_vs_DMSO_logOE_diff0.5_allLoops_OR_allAnchorBackground_atac")
# height <-3
# width <- 2
# png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
# print(p)
# dev.off()
# svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
# print(p)
# dev.off()
- Limiting to ATAC-seq signal, reg loop background
atac <- fread(here(refDir, "GSM3106257_ATAC_ESC_1.bed")) %>% dplyr::select(V1, V2, V3)
colnames(atac) <- c("chr", "start", "end")
atac.gr <- makeGRangesFromDataFrame(atac)

# LOADING LOOPS
### Importing differential regulatory loops & extract anchor
loop.all <- fread(here(consensusDir, "chromo_cons_annoHierarchy.tsv")) %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "E-E"))
anchor.all <- (extractAnchor(loop.all))
overlaps <- findOverlaps(anchor.all, atac.gr)
anchor.all <- pintersect(anchor.all[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])


loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_logOE_pe-pe_dTAGvsDMSO_UP_diff0.5.bedpe"))
anchor.up <- (extractAnchor(loop.up))
overlaps <- findOverlaps(anchor.up, atac.gr)
anchor.up <- pintersect(anchor.up[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_logOE_pe-pe_dTAGvsDMSO_NO_diff0.5.bedpe"))
anchor.no <- (extractAnchor(loop.no))
overlaps <- findOverlaps(anchor.no, atac.gr)
anchor.no <- pintersect(anchor.no[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])


loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_logOE_pe-pe_dTAGvsDMSO_DOWN_diff0.5.bedpe"))
anchor.down <- (extractAnchor(loop.down))
overlaps <- findOverlaps(anchor.down, atac.gr)
anchor.down <- pintersect(anchor.down[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
# RUNNING LOLA
lolaDir <- here("../../result/lola")
dir.create(lolaDir, showWarnings = FALSE, recursive = TRUE)

# UP
result = runLOLA(anchor.up, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_up_pe-peLoops_atac.tsv"), sep = "\t")

# NO
result = runLOLA(anchor.no, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_no_pe-peLoops_atac.tsv"), sep = "\t")

# DOWN
result = runLOLA(anchor.down, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_down_pe-peLoops_atac.tsv"), sep = "\t")


### HEATMAP
alpha <- 0.05
tb.up <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_up_pe-peLoops_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.no <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_no_pe-peLoops_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.down <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_down_pe-peLoops_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)

temp.up <- tb.up %>% dplyr::select(target, oddsRatio)
colnames(temp.up) <- c("target", "OR_up")
temp.no <- tb.no %>% dplyr::select(target, oddsRatio)
colnames(temp.no) <- c("target", "OR_no")
temp.down <- tb.down %>% dplyr::select(target, oddsRatio)
colnames(temp.down) <- c("target", "OR_down")


temp <- full_join(full_join(temp.up, temp.no, by = c("target")), temp.down, by = c("target")) %>% mutate_all(~replace_na(., 1))

data <- as.matrix(temp[2:4])
rownames(data) <- temp$target

library(circlize)
col_fun <- colorRamp2(c(min(data), max(data)), c("white", "red"))


#fviz_nbclust(data, kmeans, method = "wss")

p <- Heatmap(
  data,
  name = "Odds Ratio",                   # Name of the heatmap legend
  cluster_columns = FALSE,            # Remove column dendrogram
  row_km = 4,                         # Define the number of k-means clusters for rows (adjust as needed)
  show_row_dend = FALSE,
  col = col_fun                       # Use the red gradient color scale
)

fileName <- paste0("anchorLOLA_dTAG_vs_DMSO_logOE_diff0.5_pe-peLoops_regAnchorBackground_atac")
height <- 7
width <- 3.5
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()



### Visualizing p-value and OR
alpha <- 0.05
tb.up <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_up_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody),
                qValueLog = -log2(qValue),
                group = "UP") %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.no <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_no_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody),
                qValueLog = -log2(qValue),
                group = "NO") %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.down <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_down_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody),
                qValueLog = -log2(qValue),
                group = "DOWN") %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)

temp.up <- tb.up %>% dplyr::select(target, oddsRatio, qValueLog, group)
temp.no <- tb.no %>% dplyr::select(target, oddsRatio, qValueLog, group)
temp.down <- tb.down %>% dplyr::select(target, oddsRatio, qValueLog, group)

temp <- bind_rows(temp.up, temp.down)

order <- c((temp %>% dplyr::filter(group == "UP") %>% arrange(desc(oddsRatio)))$target, 
           (temp %>% dplyr::filter(group == "DOWN") %>% arrange(desc(oddsRatio)))$target)
temp$target <- factor(temp$target, levels = rev(order))
p <- ggplot(temp, aes(x = group, y = target, color = qValueLog, size = oddsRatio)) +
  geom_point() + theme_bw() + scale_color_gradient(low = "blue", high = "red") +
  labs(x = NULL, y = NULL)  +
  theme(axis.text = element_text(size = 6),  # Set axis text size
        axis.title = element_text(size = 6), # Set axis title size (if not removed)
        legend.text = element_text(size = 6), # Set legend text size
        legend.title = element_text(size = 6)) 

fileName <- paste0("anchorLOLA_dTAG_vs_DMSO_diff0.2_OR_qValue_regAnchorBackground_atac")
height <-3
width <- 2
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()





### 241015 Testing on subset of loops
anchor.c1 <- extractAnchor(loop.cluster1)
overlaps <- findOverlaps(anchor.c1, atac.gr)
anchor.c1 <- pintersect(anchor.c1[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

anchor.c2 <- extractAnchor(loop.cluster2)
overlaps <- findOverlaps(anchor.c2, atac.gr)
anchor.c2 <- pintersect(anchor.c2[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

anchor.c3 <- extractAnchor(loop.cluster3)
overlaps <- findOverlaps(anchor.c3, atac.gr)
anchor.c3 <- pintersect(anchor.c3[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

anchor.c4 <- extractAnchor(loop.cluster4)
overlaps <- findOverlaps(anchor.c4, atac.gr)
anchor.c4 <- pintersect(anchor.c4[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

anchor.c5 <- extractAnchor(loop.cluster5)
overlaps <- findOverlaps(anchor.c5, atac.gr)
anchor.c5 <- pintersect(anchor.c5[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

anchor.c6 <- extractAnchor(loop.cluster6)
overlaps <- findOverlaps(anchor.c6, atac.gr)
anchor.c6 <- pintersect(anchor.c6[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

anchor.c7 <- extractAnchor(loop.cluster7)
overlaps <- findOverlaps(anchor.c7, atac.gr)
anchor.c7 <- pintersect(anchor.c7[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

anchor.c8 <- extractAnchor(loop.cluster8)
overlaps <- findOverlaps(anchor.c8, atac.gr)
anchor.c8 <- pintersect(anchor.c8[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

result = runLOLA(anchor.c1, anchor.all, lolaDB)
tb.c1 = as_tibble(result)

result = runLOLA(anchor.c2, anchor.all, lolaDB)
tb.c2 = as_tibble(result)

result = runLOLA(anchor.c3, anchor.all, lolaDB)
tb.c3 = as_tibble(result)

result = runLOLA(anchor.c4, anchor.all, lolaDB)
tb.c4 = as_tibble(result)
result = runLOLA(anchor.c5, anchor.all, lolaDB)
tb.c5 = as_tibble(result)

result = runLOLA(anchor.c6, anchor.all, lolaDB)
tb.c6 = as_tibble(result)

result = runLOLA(anchor.c7, anchor.all, lolaDB)
tb.c7 = as_tibble(result)

result = runLOLA(anchor.c8, anchor.all, lolaDB)
tb.c8 = as_tibble(result)

### HEATMAP
alpha <- 0.05
tb.c1 <- tb.c1 %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.c2 <- tb.c2 %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.c3 <- tb.c3 %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.c4 <- tb.c4 %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.c5 <- tb.c5 %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>%
  dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)

tb.c6 <- tb.c6 %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>%
  dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)

tb.c7 <- tb.c7 %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>%
  dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)

tb.c8 <- tb.c8 %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>%
  dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)

temp.c1 <- tb.c1 %>% dplyr::select(target, oddsRatio)
colnames(temp.c1) <- c("target", "OR_c1")
temp.c2 <- tb.c2 %>% dplyr::select(target, oddsRatio)
colnames(temp.c2) <- c("target", "OR_c2")
temp.c3 <- tb.c3 %>% dplyr::select(target, oddsRatio)
colnames(temp.c3) <- c("target", "OR_c3")
temp.c4 <- tb.c4 %>% dplyr::select(target, oddsRatio)
colnames(temp.c4) <- c("target", "OR_c4")
temp.c5 <- tb.c5 %>% dplyr::select(target, oddsRatio)
colnames(temp.c5) <- c("target", "OR_c5")
temp.c6 <- tb.c6 %>% dplyr::select(target, oddsRatio)
colnames(temp.c6) <- c("target", "OR_c6")
temp.c7 <- tb.c7 %>% dplyr::select(target, oddsRatio)
colnames(temp.c7) <- c("target", "OR_c7")
temp.c8 <- tb.c8 %>% dplyr::select(target, oddsRatio)
colnames(temp.c8) <- c("target", "OR_c8")

temp <- full_join(temp.c1, temp.c2, by = "target") %>%
  full_join(temp.c3, by = "target") %>%
  full_join(temp.c4, by = "target") %>%
  full_join(temp.c5, by = "target") %>%
  full_join(temp.c6, by = "target") %>%
  full_join(temp.c7, by = "target") %>%
  full_join(temp.c8, by = "target") %>%
  mutate_all(~replace_na(., 1))

data <- temp %>% column_to_rownames("target") %>% as.matrix()

library(circlize)
col_fun <- colorRamp2(c(1, max(data)), c("white", "red"))


#fviz_nbclust(data, kmeans, method = "wss")

p <- Heatmap(
  data,
  name = "Odds Ratio",                   # Name of the heatmap legend
  cluster_columns = FALSE,            # Remove column dendrogram
  show_row_dend = FALSE,
  col = col_fun,
  border = TRUE
)


# Initialize lists to store results
anchors <- list()
tbs <- list()
temps <- list()

# Process clusters c1 to c8
for (i in 1:4) {
  # Extract anchor
  loop_cluster <- get(paste0("loop.cluster", i))
  anchor <- extractAnchor(loop_cluster)
  
  # Find overlaps and intersect
  overlaps <- findOverlaps(anchor, atac.gr)
  anchor <- pintersect(anchor[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
  
  # Store anchor
  anchors[[i]] <- anchor
  
  # Run LOLA
  result <- runLOLA(anchor, anchor.all, lolaDB)
  tb <- as_tibble(result)
  
  # Filter and summarize
  tb <- tb %>%
    dplyr::mutate(target = toupper(antibody)) %>%
    filter(str_to_lower(cellType) == "embryonic stem cell") %>%
    dplyr::filter(qValue < alpha) %>%
    dplyr::group_by(target) %>%
    slice_min(meanRnk, with_ties = FALSE)
  
  # Store tb
  tbs[[i]] <- tb
  
  # Select and rename oddsRatio
  temp <- tb %>% dplyr::select(target, oddsRatio)
  colnames(temp) <- c("target", paste0("OR_c", i))
  
  # Store temp
  temps[[i]] <- temp
}

# Merge all temp tables into one
temp <- Reduce(function(x, y) full_join(x, y, by = "target"), temps) %>%
  mutate_all(~replace_na(., 1))
data <- temp %>% column_to_rownames("target") %>% as.matrix()

library(circlize)
col_fun <- colorRamp2(c(1, max(data)), c("white", "red"))


#fviz_nbclust(data, kmeans, method = "wss")

p <- Heatmap(
  data,
  name = "Odds Ratio",                   # Name of the heatmap legend
  cluster_columns = FALSE,            # Remove column dendrogram
  show_row_dend = FALSE,
  col = col_fun,
  border = TRUE
)

A485

- Limited to ATAC-seq signal, all loop backgroupd
atac <- fread(here(refDir, "GSM3106257_ATAC_ESC_1.bed")) %>% dplyr::select(V1, V2, V3)
colnames(atac) <- c("chr", "start", "end")
atac.gr <- makeGRangesFromDataFrame(atac)

# LOADING LOOPS
### Importing differential regulatory loops & extract anchor
loop.all <- fread(here(consensusDir, "chromo_cons_annoHierarchy.tsv"))
anchor.all <- (extractAnchor(loop.all))
overlaps <- findOverlaps(anchor.all, atac.gr)
anchor.all <- pintersect(anchor.all[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_A485vsDMSO_UP_diff0.2.bedpe"))
anchor.up <- (extractAnchor(loop.up))
overlaps <- findOverlaps(anchor.up, atac.gr)
anchor.up <- pintersect(anchor.up[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_A485vsDMSO_DOWN_diff0.2.bedpe"))
anchor.down <- (extractAnchor(loop.down))
overlaps <- findOverlaps(anchor.down, atac.gr)
anchor.down <- pintersect(anchor.down[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

loopNum <- nrow(loop.down)

# Selecting same number of extreme no loops
loop.no <- loop.all %>% 
  dplyr::mutate(absDiff = abs(diff_A485_DMSO)) %>%
  dplyr::filter(absDiff < 0.2) %>%
  dplyr::arrange(absDiff) %>% slice_head(n = loopNum)
anchor.no <- (extractAnchor(loop.no))
overlaps <- findOverlaps(anchor.no, atac.gr)
anchor.no <- pintersect(anchor.no[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
# RUNNING LOLA
lolaDir <- here("../../result/lola")
dir.create(lolaDir, showWarnings = FALSE, recursive = TRUE)

# UP
result = runLOLA(anchor.up, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_up_allLoops_extreme_atac.tsv"), sep = "\t")

# NO
result = runLOLA(anchor.no, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_no_allLoops_extreme_atac.tsv"), sep = "\t")

# DOWN
result = runLOLA(anchor.down, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_down_allLoops_extreme_atac.tsv"), sep = "\t")



### HEATMAP
alpha <- 0.05
tb.up <- fread(here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_up_allLoops_extreme_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.no <- fread(here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_no_allLoops_extreme_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.down <- fread(here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_down_allLoops_extreme_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)

temp.up <- tb.up %>% dplyr::select(target, oddsRatio)
colnames(temp.up) <- c("target", "OR_up")
temp.no <- tb.no %>% dplyr::select(target, oddsRatio)
colnames(temp.no) <- c("target", "OR_no")
temp.down <- tb.down %>% dplyr::select(target, oddsRatio)
colnames(temp.down) <- c("target", "OR_down")


temp <- full_join(full_join(temp.up, temp.no, by = c("target")), temp.down, by = c("target")) %>% mutate_all(~replace_na(., 1))

data <- as.matrix(temp[2:4])
rownames(data) <- temp$target

library(circlize)
col_fun <- colorRamp2(c(min(data), max(data)), c("white", "red"))


#fviz_nbclust(data, kmeans, method = "wss")

p <- Heatmap(
  data,
  name = "Odds Ratio",                   # Name of the heatmap legend
  cluster_columns = FALSE,            # Remove column dendrogram
  row_km = 5,                         # Define the number of k-means clusters for rows (adjust as needed)
  show_row_dend = FALSE,
  col = col_fun,
  border = TRUE
)

fileName <- paste0("anchorLOLA_A485_vs_DMSO_diff0.2_allLoops_extreme_regAnchorBackground_atac")
height <- 7
width <- 3.5
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()




### Visualizing p-value and OR
alpha <- 0.05
tb.up <- fread(here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_up_allLoops_extreme_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody),
                qValueLog = -log2(qValue),
                group = "UP") %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.no <- fread(here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_no_allLoops_extreme_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody),
                qValueLog = -log2(qValue),
                group = "NO") %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.down <- fread(here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_down_allLoops_extreme_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody),
                qValueLog = -log2(qValue),
                group = "DOWN") %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)

temp.up <- tb.up %>% dplyr::select(target, oddsRatio, qValueLog, group)
temp.no <- tb.no %>% dplyr::select(target, oddsRatio, qValueLog, group)
temp.down <- tb.down %>% dplyr::select(target, oddsRatio, qValueLog, group)

temp <- bind_rows(temp.up, temp.down)

order <- unique(c((temp %>% dplyr::filter(group == "UP") %>% arrange(desc(oddsRatio)))$target, 
           (temp %>% dplyr::filter(group == "DOWN") %>% arrange(desc(oddsRatio)))$target))
temp$target <- factor(temp$target, levels = rev(order))
p <- ggplot(temp, aes(x = group, y = target, color = qValueLog, size = oddsRatio)) +
  geom_point() + theme_bw() + scale_color_gradient(low = "blue", high = "red") +
  labs(x = NULL, y = NULL)  +   scale_size_continuous(range = c(1, 3)) +
  theme(axis.text = element_text(size = 6),  # Set axis text size
        axis.title = element_text(size = 6), # Set axis title size (if not removed)
        legend.text = element_text(size = 6), # Set legend text size
        legend.title = element_text(size = 6)) 

fileName <- paste0("anchorLOLA_A485_vs_DMSO_diff0.2_allLoops_extreme_OR_qValue_regAnchorBackground_atac")
height <-3
width <- 2
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
- Limited to ATAC-seq signal, reg loop backgrouund
atac <- fread(here(refDir, "GSM3106257_ATAC_ESC_1.bed")) %>% dplyr::select(V1, V2, V3)
colnames(atac) <- c("chr", "start", "end")
atac.gr <- makeGRangesFromDataFrame(atac)

# LOADING LOOPS
### Importing differential regulatory loops & extract anchor
loop.all <- fread(here(consensusDir, "chromo_cons_annoHierarchy.tsv")) %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "E-E"))
anchor.all <- (extractAnchor(loop.all))
overlaps <- findOverlaps(anchor.all, atac.gr)
anchor.all <- pintersect(anchor.all[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_UP_diff0.2.bedpe"))
anchor.up <- (extractAnchor(loop.up))
overlaps <- findOverlaps(anchor.up, atac.gr)
anchor.up <- pintersect(anchor.up[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_NO_diff0.2.bedpe"))
anchor.no <- (extractAnchor(loop.no))
overlaps <- findOverlaps(anchor.no, atac.gr)
anchor.no <- pintersect(anchor.no[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_DOWN_diff0.2.bedpe"))
anchor.down <- (extractAnchor(loop.down))
overlaps <- findOverlaps(anchor.down, atac.gr)
anchor.down <- pintersect(anchor.down[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
# RUNNING LOLA
lolaDir <- here("../../result/lola")
dir.create(lolaDir, showWarnings = FALSE, recursive = TRUE)

# UP
result = runLOLA(anchor.up, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_up_atac.tsv"), sep = "\t")

# NO
result = runLOLA(anchor.no, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_no_atac.tsv"), sep = "\t")

# DOWN
result = runLOLA(anchor.down, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_down_atac.tsv"), sep = "\t")



### HEATMAP
alpha <- 0.05
tb.up <- fread(here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_up_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.no <- fread(here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_no_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.down <- fread(here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_down_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)

temp.up <- tb.up %>% dplyr::select(target, oddsRatio)
colnames(temp.up) <- c("target", "OR_up")
temp.no <- tb.no %>% dplyr::select(target, oddsRatio)
colnames(temp.no) <- c("target", "OR_no")
temp.down <- tb.down %>% dplyr::select(target, oddsRatio)
colnames(temp.down) <- c("target", "OR_down")


temp <- full_join(full_join(temp.up, temp.no, by = c("target")), temp.down, by = c("target")) %>% mutate_all(~replace_na(., 1))

data <- as.matrix(temp[2:4])
rownames(data) <- temp$target

library(circlize)
col_fun <- colorRamp2(c(min(data), max(data)), c("white", "red"))


p <- Heatmap(
  data,
  name = "Odds Ratio",                   # Name of the heatmap legend
  cluster_columns = FALSE,            # Remove column dendrogram
  row_km = 4,                         # Define the number of k-means clusters for rows (adjust as needed)
  show_row_dend = FALSE,
  col = col_fun                       # Use the red gradient color scale
)

fileName <- paste0("anchorLOLA_A485_vs_DMSO_diff0.2_regAnchorBackground_atac")
height <- 7
width <- 3.5
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()


### Visualizing p-value and OR
alpha <- 0.05
tb.up <- fread(here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_up_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody),
                qValueLog = -log2(qValue),
                group = "UP") %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.no <- fread(here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_no_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody),
                qValueLog = -log2(qValue),
                group = "NO") %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.down <- fread(here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_down_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody),
                qValueLog = -log2(qValue),
                group = "DOWN") %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)

temp.up <- tb.up %>% dplyr::select(target, oddsRatio, qValueLog, group)
temp.no <- tb.no %>% dplyr::select(target, oddsRatio, qValueLog, group)
temp.down <- tb.down %>% dplyr::select(target, oddsRatio, qValueLog, group)

temp <- bind_rows(temp.up, temp.down)

order <- c((temp %>% dplyr::filter(group == "UP") %>% arrange(desc(oddsRatio)))$target, 
           (temp %>% dplyr::filter(group == "DOWN") %>% arrange(desc(oddsRatio)))$target)
temp$target <- factor(temp$target, levels = rev(order))
p <- ggplot(temp, aes(x = group, y = target, color = qValueLog, size = oddsRatio)) +
  geom_point() + theme_bw() + scale_color_gradient(low = "blue", high = "red") +
  labs(x = NULL, y = NULL)  +
  theme(axis.text = element_text(size = 6),  # Set axis text size
        axis.title = element_text(size = 6), # Set axis title size (if not removed)
        legend.text = element_text(size = 6), # Set legend text size
        legend.title = element_text(size = 6)) 

fileName <- paste0("anchorLOLA_A485_vs_DMSO_diff0.2_OR_qValue_regAnchorBackground_atac")
height <-3
width <- 2
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

[2.22] Comparing absolute RNA expression level across group

The aim is to see if there is a trend in RNA expression level among group 1, 2, 3, 4. It would be making comparison among genes. For this, TPM should be used.

## Importing groups
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group2.tsv"))$gene
group5 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group5.tsv"))$gene
group8 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group8.tsv"))$gene

## Importing RNA-seq TPM for DMSO
tpm.selected <- fread(here(refDir, "readCount.filtered.TPM.all.tsv")) %>% dplyr::select(1, 3, 4, 5)
colnames(tpm.selected) <- c("ensembl", "rep1", "rep2", "rep3")

tpm.selected <- tpm.selected %>% dplyr::rowwise() %>%
  dplyr::mutate(
  group = ifelse(ensembl %in% group1, "group1",
                 ifelse(ensembl %in% group2, "group2",
                        ifelse(ensembl %in% group5, "group5",
                               ifelse(ensembl %in% group8, "group8", NA))))) %>% 
  dplyr::filter(!is.na(group)) %>%
  dplyr::mutate(avgTPM = mean(rep1, rep2, rep3))


ggplot(tpm.selected, aes(x = group, y = avgTPM)) +
  geom_violin(aes(fill = group), show.legend = FALSE) +
  geom_boxplot(width = 0.1, outlier.shape = NA) + 
  scale_y_log10() +
  theme_classic()



  p <- ggplot(temp.tb, aes(x = group, y = score)) + geom_violin(aes(fill = group), show.legend = FALSE) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() + coord_cartesian(ylim = c(ymin, ymax)) +
    annotate("text", x = 1, y = ymin + 1, label = paste0("p12: ", convPvalue(p12), "\n",
                                                         "p15: ", convPvalue(p15), "\n",
                                                         "p18: ", convPvalue(p18), "\n",
                                                         "p25: ", convPvalue(p25), "\n",
                                                         "p28: ", convPvalue(p28), "\n",
                                                         "p58: ",convPvalue( p58), "\n"),
             color = "black", hjust = 0, size = 3)
  
  
  fileName <- paste0("insulation_score_", note)
  height <- 3
  width <- 3
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()

[2.23] Checking Early gene from Bobbie

dTAG

## Importing groups
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group2.tsv"))$gene
group5 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group5.tsv"))$gene
group8 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group8.tsv"))$gene


## Importing Bobbie gene classification
geneCluster <- fread(here(refDir, "bobbie_gene_classification.csv")) %>%
  dplyr::select(gene, enst, Cluster)
colnames(geneCluster) <- c("gene", "ensembl_transcript_id", "cluster")


## Converting transcript ID to gene ID
idPair_tg <- getBM(attributes = c("ensembl_transcript_id", "ensembl_gene_id"),
                 filters = "ensembl_transcript_id",
                 values = geneCluster$ensembl_transcript,
                 mart = ensembl.v102)
geneCluster <- geneCluster %>% dplyr::left_join(idPair_tg, by = c("ensembl_transcript_id"))

# Making data for stacked barplot
countGene <- function(geneCluster, groupName, clusterName){
  num <- nrow(geneCluster %>% dplyr::filter(ensembl_gene_id %in% groupName,
                                            cluster %in% clusterName))
  return(num)
}
countGeneList <- function(geneCluster, groupName){
  n1 <- countGene(geneCluster, groupName, "Early")
  n2 <- countGene(geneCluster, groupName, "Middle")
  n3 <-countGene(geneCluster, groupName, "Late")
  n4 <- countGene(geneCluster, groupName, "Transient")
  return(c(n1, n2, n3, n4))
}

group <- c(rep("group1", 4), rep("group2", 4), rep("group3", 4), rep("group4", 4))
cluster <- rep(c("Early", "Middle", "Late", "Transient"), 4)
cluster <- factor(cluster, levels = c("Early", "Middle", "Late", "Transient"))
value <- c(countGeneList(geneCluster, group1),
           countGeneList(geneCluster, group2),
           countGeneList(geneCluster, group5),
           countGeneList(geneCluster, group8))

data <- data.frame(group, cluster, value)


# Plotting
ggplot(data, aes(fill=cluster, y=value, x=group)) + 
    geom_bar(position="fill", stat="identity") + theme_classic()


# Statistical analysis 
# Similar to the Chi-Square test, Fisher’s Exact Test is used when you have smaller sample sizes or when the expected frequency in any cell of the contingency table is below 5.
temp <- data %>% dplyr::filter(group %in% c("group1", "group2"))
contingency_table <- xtabs(value ~ group + cluster, data = temp)
fisher_result <- fisher.test(contingency_table, simulate.p.value = TRUE, B = 100000)
fisher_result

temp <- data %>% dplyr::filter(group %in% c("group1", "group3"))
contingency_table <- xtabs(value ~ group + cluster, data = temp)
fisher_result <- fisher.test(contingency_table, simulate.p.value = TRUE, B = 100000)
fisher_result

temp <- data %>% dplyr::filter(group %in% c("group1", "group4"))
contingency_table <- xtabs(value ~ group + cluster, data = temp)
fisher_result <- fisher.test(contingency_table, simulate.p.value = TRUE, B = 100000)
fisher_result


### P-N
resultDir <- here("../../result")
temp2 <- readRDS(here(resultDir, "gene_loop_link.rds"))

pnOver8 <- (temp2 %>% dplyr::filter(total >= 8))$gene
pnOver6 <- (temp2 %>% dplyr::filter(total >= 6, total < 8))$gene
pnOver4 <- (temp2 %>% dplyr::filter(total >= 4, total < 6))$gene
pnOver2 <- (temp2 %>% dplyr::filter(total >= 2, total < 4))$gene
pnOver0 <- (temp2 %>% dplyr::filter(total < 2))$gene

group <- c(rep("pnOver8", 4), rep("pnOver6", 4), rep("pnOver4", 4), rep("pnOver2", 4), rep("pnOver0", 4))
cluster <- rep(c("Early", "Middle", "Late", "Transient"), 5)
cluster <- factor(cluster, levels = c("Early", "Middle", "Late", "Transient"))
value <- c(countGeneList(geneCluster, pnOver8),
           countGeneList(geneCluster, pnOver6),
           countGeneList(geneCluster, pnOver4),
           countGeneList(geneCluster, pnOver2),
           countGeneList(geneCluster, pnOver0))

data <- data.frame(group, cluster, value)
# Plotting
ggplot(data, aes(fill=cluster, y=value, x=group)) + 
    geom_bar(position="fill", stat="identity") + theme_classic()

temp <- data %>% dplyr::filter(group %in% c("pnOver2", "pnOver8"))
contingency_table <- xtabs(value ~ group + cluster, data = temp)
fisher_result <- fisher.test(contingency_table, simulate.p.value = TRUE, B = 100000)
fisher_result

### P-S
psOver4 <- (temp2 %>% dplyr::filter(num_ps >= 4))$gene
psOver3 <- (temp2 %>% dplyr::filter(num_ps >= 3, num_ps < 4))$gene
psOver2 <- (temp2 %>% dplyr::filter(num_ps >= 2, num_ps < 3))$gene
psOver1 <- (temp2 %>% dplyr::filter(num_ps >= 1, num_ps < 2))$gene
psOver0 <- (temp2 %>% dplyr::filter(num_ps < 1))$gene

group <- c(rep("psOver4", 4), rep("psOver3", 4), rep("psOver2", 4), rep("psOver1", 4), rep("psOver0", 4))
cluster <- rep(c("Early", "Middle", "Late", "Transient"), 5)
cluster <- factor(cluster, levels = c("Early", "Middle", "Late", "Transient"))
value <- c(countGeneList(geneCluster, psOver4),
           countGeneList(geneCluster, psOver3),
           countGeneList(geneCluster, psOver2),
           countGeneList(geneCluster, psOver1),
           countGeneList(geneCluster, psOver0))

data <- data.frame(group, cluster, value)
# Plotting
ggplot(data, aes(fill=cluster, y=value, x=group)) + 
    geom_bar(position="fill", stat="identity") + theme_classic()
temp <- data %>% dplyr::filter(group %in% c("psOver2", "psOver4"))
contingency_table <- xtabs(value ~ group + cluster, data = temp)
fisher_result <- fisher.test(contingency_table, simulate.p.value = TRUE, B = 100000)
fisher_result

### P-E
peOver4 <- (temp2 %>% dplyr::filter(num_pe >= 4))$gene
peOver3 <- (temp2 %>% dplyr::filter(num_pe >= 3, num_pe < 4))$gene
peOver2 <- (temp2 %>% dplyr::filter(num_pe >= 2, num_pe < 3))$gene
peOver1 <- (temp2 %>% dplyr::filter(num_pe >= 1, num_pe < 2))$gene
peOver0 <- (temp2 %>% dplyr::filter(num_pe < 1))$gene

group <- c(rep("peOver4", 4), rep("peOver3", 4), rep("peOver2", 4), rep("peOver1", 4), rep("peOver0", 4))
cluster <- rep(c("Early", "Middle", "Late", "Transient"), 5)
cluster <- factor(cluster, levels = c("Early", "Middle", "Late", "Transient"))
value <- c(countGeneList(geneCluster, peOver4),
           countGeneList(geneCluster, peOver3),
           countGeneList(geneCluster, peOver2),
           countGeneList(geneCluster, peOver1),
           countGeneList(geneCluster, peOver0))

data <- data.frame(group, cluster, value)
# Plotting
ggplot(data, aes(fill=cluster, y=value, x=group)) + 
    geom_bar(position="fill", stat="identity") + theme_classic()

### P-P
ppOver4 <- (temp2 %>% dplyr::filter(num_pp >= 4))$gene
ppOver3 <- (temp2 %>% dplyr::filter(num_pp >= 3, num_pp < 4))$gene
ppOver2 <- (temp2 %>% dplyr::filter(num_pp >= 2, num_pp < 3))$gene
ppOver1 <- (temp2 %>% dplyr::filter(num_pp >= 1, num_pp < 2))$gene
ppOver0 <- (temp2 %>% dplyr::filter(num_pp < 1))$gene

group <- c(rep("ppOver4", 4), rep("ppOver3", 4), rep("ppOver2", 4), rep("ppOver1", 4), rep("ppOver0", 4))
cluster <- rep(c("Early", "Middle", "Late", "Transient"), 5)
cluster <- factor(cluster, levels = c("Early", "Middle", "Late", "Transient"))
value <- c(countGeneList(geneCluster, ppOver4),
           countGeneList(geneCluster, ppOver3),
           countGeneList(geneCluster, ppOver2),
           countGeneList(geneCluster, ppOver1),
           countGeneList(geneCluster, ppOver0))
data <- data.frame(group, cluster, value)
# Plotting
ggplot(data, aes(fill=cluster, y=value, x=group)) + 
    geom_bar(position="fill", stat="identity") + theme_classic()

A485

## Importing groups
group1 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group2.tsv"))$gene
group3 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group3.tsv"))$gene
group4 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group4.tsv"))$gene
group5 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group5.tsv"))$gene
group6 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group6.tsv"))$gene
group7 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group7.tsv"))$gene
group8 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group8.tsv"))$gene
group9 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group9.tsv"))$gene


## Importing Bobbie gene classification
geneCluster <- fread(here(refDir, "bobbie_gene_classification.csv")) %>%
  dplyr::select(gene, enst, Cluster)
colnames(geneCluster) <- c("gene", "ensembl_transcript_id", "cluster")


## Converting transcript ID to gene ID
idPair_tg <- getBM(attributes = c("ensembl_transcript_id", "ensembl_gene_id"),
                 filters = "ensembl_transcript_id",
                 values = geneCluster$ensembl_transcript,
                 mart = ensembl.v102)
geneCluster <- geneCluster %>% dplyr::left_join(idPair_tg, by = c("ensembl_transcript_id"))

# Making data for stacked barplot
countGene <- function(geneCluster, groupName, clusterName){
  num <- nrow(geneCluster %>% dplyr::filter(ensembl_gene_id %in% groupName,
                                            cluster %in% clusterName))
  return(num)
}
countGeneList <- function(geneCluster, groupName){
  n1 <- countGene(geneCluster, groupName, "Early")
  n2 <- countGene(geneCluster, groupName, "Middle")
  n3 <-countGene(geneCluster, groupName, "Late")
  n4 <- countGene(geneCluster, groupName, "Transient")
  return(c(n1, n2, n3, n4))
}

group <- c(rep("group1", 4), rep("group2", 4), rep("group3", 4),
           rep("group4", 4), rep("group5", 4), rep("group6", 4),
           rep("group7", 4), rep("group8", 4), rep("group9", 4))
cluster <- rep(c("Early", "Middle", "Late", "Transient"), 9)
cluster <- factor(cluster, levels = c("Early", "Middle", "Late", "Transient"))
value <- c(countGeneList(geneCluster, group1),
           countGeneList(geneCluster, group2),
           countGeneList(geneCluster, group3),
           countGeneList(geneCluster, group4),
           countGeneList(geneCluster, group5),
           countGeneList(geneCluster, group6),
           countGeneList(geneCluster, group7),
           countGeneList(geneCluster, group8),
           countGeneList(geneCluster, group9))

data <- data.frame(group, cluster, value)


# Plotting
ggplot(data, aes(fill=cluster, y=value, x=group)) + 
    geom_bar(position="fill", stat="identity") + theme_classic()


# Statistical analysis 
# Similar to the Chi-Square test, Fisher’s Exact Test is used when you have smaller sample sizes or when the expected frequency in any cell of the contingency table is below 5.
temp <- data %>% dplyr::filter(group %in% c("group1", "group2"))
contingency_table <- xtabs(value ~ group + cluster, data = temp)
fisher_result <- fisher.test(contingency_table, simulate.p.value = TRUE, B = 100000)
fisher_result

temp <- data %>% dplyr::filter(group %in% c("group1", "group3"))
contingency_table <- xtabs(value ~ group + cluster, data = temp)
fisher_result <- fisher.test(contingency_table, simulate.p.value = TRUE, B = 100000)
fisher_result

temp <- data %>% dplyr::filter(group %in% c("group1", "group4"))
contingency_table <- xtabs(value ~ group + cluster, data = temp)
fisher_result <- fisher.test(contingency_table, simulate.p.value = TRUE, B = 100000)
fisher_result

[2.24] Checking bookmarking

  1. Checking whether subset of regulatory loop have bookmarking
  2. Checking whether loops related to genes have bookmarking H3K27ac_effie ATAC_effie TBP Oct4_effie, Oct4_dsg Sox2_effie, Sox2_dsg Esrrb_dsg Klf4_effie CTCF ##### Functions
extractAnchor <- function(loop){
  anchor1 <- loop %>% dplyr::select(c(1, 2, 3))
  colnames(anchor1) <- c("chr", "start", "end")
  anchor2 <- loop %>% dplyr::select(c(4, 5, 6))
  colnames(anchor2) <- c("chr", "start", "end")
  anchors <- reduce(makeGRangesFromDataFrame(bind_rows(anchor1, anchor2)))
    return(anchors)
}

runFisherExact <- function(interest.gr, background.gr, query.gr){
  overlaps_interest <- countOverlaps(interest.gr, query.gr)
  a <- sum(overlaps_interest > 0)
  total_interest <- length(interest.gr)
  c <- total_interest - a
  
  overlaps_background <- countOverlaps(background.gr, query.gr)
  b <- sum(overlaps_background > 0)
  total_background <- length(background.gr)
  d <- total_background - b
  
  # Construct contingency table
  contingency_table <- matrix(c(a, c, b, d), nrow=2, byrow=TRUE,
                              dimnames=list("Region" = c("interest.gr", "background.gr"),
                                            "Overlap" = c("Yes", "No")))
  
  # Perform Fisher's exact test
  fisher_result <- fisher.test(contingency_table)
  
  return(fisher_result)
}

runFisherExactCombination <- function(interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr,
                                      targetName,
                                      retained.gr, lost.gr){
  # SEEDING
  temp <- runFisherExact(loop.up.gr, background.gr, retained.gr)
  result.tb <- tibble(interest = paste0(interestName, "_UP"),
                      target = paste0(targetName, "_retained"),
                      pvalue = temp$p.value,
                      oddsRatio = temp$estimate)
  
  # ADDING ROWS
  temp <- runFisherExact(loop.no.gr, background.gr, retained.gr)
  result.tb <- result.tb %>% 
    add_row(interest = paste0(interestName, "_NO"),
            target = paste0(targetName, "_retained"),
            pvalue = temp$p.value,
            oddsRatio = temp$estimate)
  
  # ADDING ROWS
  temp <- runFisherExact(loop.down.gr, background.gr, retained.gr)
  result.tb <- result.tb %>% 
    add_row(interest = paste0(interestName, "_DOWN"),
            target = paste0(targetName, "_retained"),
            pvalue = temp$p.value,
            oddsRatio = temp$estimate)
  
  # ADDING ROWS
  temp <- runFisherExact(loop.up.gr, background.gr, lost.gr)
  result.tb <- result.tb %>% 
    add_row(interest = paste0(interestName, "_UP"),
            target = paste0(targetName, "_lost"),
            pvalue = temp$p.value,
            oddsRatio = temp$estimate)
  
  # ADDING ROWS
  temp <- runFisherExact(loop.no.gr, background.gr, lost.gr)
  result.tb <- result.tb %>% 
    add_row(interest = paste0(interestName, "_NO"),
            target = paste0(targetName, "_lost"),
            pvalue = temp$p.value,
            oddsRatio = temp$estimate)
  
  # ADDING ROWS
  temp <- runFisherExact(loop.down.gr, background.gr, lost.gr)
  result.tb <- result.tb %>% 
    add_row(interest = paste0(interestName, "_DOWN"),
            target = paste0(targetName, "_lost"),
            pvalue = temp$p.value,
            oddsRatio = temp$estimate)
  
  return(result.tb)
}

runFisherExactCombinationBinary <- function(interestName,
                                      loop.upno.gr, loop.down.gr,
                                      background.gr,
                                      targetName,
                                      retained.gr, lost.gr){
  # SEEDING
  temp <- runFisherExact(loop.upno.gr, background.gr, retained.gr)
  result.tb <- tibble(interest = paste0(interestName, "_UP/NO"),
                      target = paste0(targetName, "_retained"),
                      pvalue = temp$p.value,
                      oddsRatio = temp$estimate)
  

  # ADDING ROWS
  temp <- runFisherExact(loop.down.gr, background.gr, retained.gr)
  result.tb <- result.tb %>% 
    add_row(interest = paste0(interestName, "_DOWN"),
            target = paste0(targetName, "_retained"),
            pvalue = temp$p.value,
            oddsRatio = temp$estimate)
  
  # ADDING ROWS
  temp <- runFisherExact(loop.upno.gr, background.gr, lost.gr)
  result.tb <- result.tb %>% 
    add_row(interest = paste0(interestName, "_UP/NO"),
            target = paste0(targetName, "_lost"),
            pvalue = temp$p.value,
            oddsRatio = temp$estimate)

  
  # ADDING ROWS
  temp <- runFisherExact(loop.down.gr, background.gr, lost.gr)
  result.tb <- result.tb %>% 
    add_row(interest = paste0(interestName, "_DOWN"),
            target = paste0(targetName, "_lost"),
            pvalue = temp$p.value,
            oddsRatio = temp$estimate)
  
  return(result.tb)
}

runFisherExactCombinationRetained <- function(interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr,
                                      targetName,
                                      retained.gr){
  # SEEDING
  temp <- runFisherExact(loop.up.gr, background.gr, retained.gr)
  result.tb <- tibble(interest = paste0(interestName, "_UP"),
                      target = paste0(targetName, "_retained"),
                      pvalue = temp$p.value,
                      oddsRatio = temp$estimate)
  
  # ADDING ROWS
  temp <- runFisherExact(loop.no.gr, background.gr, retained.gr)
  result.tb <- result.tb %>% 
    add_row(interest = paste0(interestName, "_NO"),
            target = paste0(targetName, "_retained"),
            pvalue = temp$p.value,
            oddsRatio = temp$estimate)
  
  # ADDING ROWS
  temp <- runFisherExact(loop.down.gr, background.gr, retained.gr)
  result.tb <- result.tb %>% 
    add_row(interest = paste0(interestName, "_DOWN"),
            target = paste0(targetName, "_retained"),
            pvalue = temp$p.value,
            oddsRatio = temp$estimate)
  

  return(result.tb)
}

runFisherExactCombinationTarget <- function(targetName, interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr){
  # RETAINED loop
  retained <- fread(here(refDir, "TF_bookmarking", targetName, "am_a.bed")) %>% dplyr::select(c(1, 2, 3))
  colnames(retained) <- c("chr", "start", "end")
  retained.gr <- makeGRangesFromDataFrame(retained)
  # LOST loop
  lost <- fread(here(refDir, "TF_bookmarking", targetName, "oa.bed")) %>% dplyr::select(c(1, 2, 3))
  colnames(lost) <- c("chr", "start", "end")
  lost.gr <- makeGRangesFromDataFrame(lost)
  
  result <- runFisherExactCombination(interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr,
                                      targetName,
                                      retained.gr, lost.gr)
  
  return(result)
}

runFisherExactCombinationTargetBinary <- function(targetName, interestName,
                                      loop.upno.gr, loop.down.gr,
                                      background.gr){
  # RETAINED loop
  retained <- fread(here(refDir, "TF_bookmarking", targetName, "am_a.bed")) %>% dplyr::select(c(1, 2, 3))
  colnames(retained) <- c("chr", "start", "end")
  retained.gr <- makeGRangesFromDataFrame(retained)
  # LOST loop
  lost <- fread(here(refDir, "TF_bookmarking", targetName, "oa.bed")) %>% dplyr::select(c(1, 2, 3))
  colnames(lost) <- c("chr", "start", "end")
  lost.gr <- makeGRangesFromDataFrame(lost)
  
  result <- runFisherExactCombinationBinary(interestName,
                                      loop.upno.gr, loop.down.gr,
                                      background.gr,
                                      targetName,
                                      retained.gr, lost.gr)
  
  return(result)
}

runFisherExactCombinationTargetRetained <- function(targetName, interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr){
  # RETAINED loop
  retained <- fread(here(refDir, "TF_bookmarking", targetName, "am_a.bed")) %>% dplyr::select(c(1, 2, 3))
  colnames(retained) <- c("chr", "start", "end")
  retained.gr <- makeGRangesFromDataFrame(retained)
  # LOST loop
  result <- runFisherExactCombinationRetained(interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr,
                                      targetName,
                                      retained.gr)
  
  return(result)
}
All loops
#### Importing loops of interest
interestName <- "allLoop"
# BACKGROUND loop
background <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all.bedpe"))
background.gr <- (extractAnchor(background))
# UP loop
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_UP_diff0.2.bedpe"))
loop.up.gr <- (extractAnchor(loop.up))
# NO loop
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_NO_diff0.2.bedpe"))
loop.no.gr <- (extractAnchor(loop.no))
# DOWN loop
loopNum <- nrow(loop.no)
loop.all <- fread(here(consensusDir, "chromo_cons_annoHierarchy.tsv"))
loop.down <- loop.all %>% dplyr::filter(diff_dTAG_DMSO < -0.2) %>% dplyr::arrange(diff_dTAG_DMSO) %>% slice_head(n = loopNum)
loop.down.gr <- (extractAnchor(loop.down))

#### Importing loops of target
temp1 <- runFisherExactCombinationTarget("ATAC_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp2 <- runFisherExactCombinationTarget("CTCF", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp3 <- runFisherExactCombinationTarget("ESRRB_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp4 <- runFisherExactCombinationTarget("ESRRB_NCB", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp5 <- runFisherExactCombinationTarget("ESRRB_pfa", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp6 <- runFisherExactCombinationTarget("H3K27ac_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp7 <- runFisherExactCombinationTarget("KLF4_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp8 <- runFisherExactCombinationTarget("NANOG_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp9 <- runFisherExactCombinationTarget("NANOG_pfa", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp10 <- runFisherExactCombinationTarget("OCT4_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp11 <- runFisherExactCombinationTarget("OCT4_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp12 <- runFisherExactCombinationTarget("OCT4_pfa", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp13 <- runFisherExactCombinationTarget("SMC1", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp14 <- runFisherExactCombinationTarget("SOX2_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp15 <- runFisherExactCombinationTarget("SOX2_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp16 <- runFisherExactCombinationTarget("SOX2_pfa", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp17 <- runFisherExactCombinationTarget("TBP", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)




data <- bind_rows(temp1, temp2, temp3, temp4, temp5,
         temp6, temp7, temp8, temp9, temp10,
         temp11, temp12, temp13, temp14, temp15, temp16 ,temp17)

# Visualization
library(circlize)

heatmap_data <- data %>% dplyr::select(target, interest, oddsRatio) %>%
pivot_wider(names_from = interest, values_from = oddsRatio) %>%
  column_to_rownames(var = "target")

pvalue_data <- data %>% dplyr::select(target, interest, pvalue) %>%
  pivot_wider(names_from = interest, values_from = pvalue) %>%
  column_to_rownames(var = "target")

col_fun <- colorRamp2(c(0, 1, 5), 
                      c("blue", "white", "red"))

Heatmap(as.matrix(heatmap_data),
        name = "Odds Ratio",
        col = col_fun,
        # Add annotation for p-values
        cell_fun = function(j, i, x, y, width, height, fill) {
          pval <- pvalue_data[i, j]
          label <- ifelse(pval > 0.05, "n.s.", sprintf("%.2e", pval))
          grid.text(label, x, y, gp = gpar(fontsize = 10))
        },
        # Customize the heatmap layout
        cluster_rows = FALSE,
        cluster_columns = FALSE,
        column_title = "Interest",
        row_title = "Target",
        heatmap_legend_param = list(at = c(0, 1, 2, 3, 4, 5), 
                                    labels = c("0", "1", "2", "3", "4", "5")))
All loops refined


#### Importing loops of target
temp1 <- runFisherExactCombinationTarget("ATAC_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp2 <- runFisherExactCombinationTarget("CTCF", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp3 <- runFisherExactCombinationTarget("ESRRB_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp4 <- runFisherExactCombinationTarget("H3K27ac_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp5 <- runFisherExactCombinationTarget("KLF4_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp6 <- runFisherExactCombinationTarget("NANOG_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp7 <- runFisherExactCombinationTarget("OCT4_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp8 <- runFisherExactCombinationTarget("SMC1", interestName,
                                     loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp9 <- runFisherExactCombinationTarget("SOX2_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp10 <- runFisherExactCombinationTarget("TBP", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)




data <- bind_rows(temp1, temp2, temp3, temp4, temp5,
         temp6, temp7, temp8, temp9, temp10)

# Visualization
library(circlize)

heatmap_data <- data %>% dplyr::select(target, interest, oddsRatio) %>%
pivot_wider(names_from = interest, values_from = oddsRatio) %>%
  column_to_rownames(var = "target")

pvalue_data <- data %>% dplyr::select(target, interest, pvalue) %>%
  pivot_wider(names_from = interest, values_from = pvalue) %>%
  column_to_rownames(var = "target")

col_fun <- colorRamp2(c(0, 1, 3), 
                      c("blue", "white", "red"))

Heatmap(as.matrix(heatmap_data),
        name = "Odds Ratio",
        col = col_fun,
        # Add annotation for p-values
        cell_fun = function(j, i, x, y, width, height, fill) {
          pval <- pvalue_data[i, j]
          label <- ifelse(pval > 0.05, "n.s.", sprintf("%.2e", pval))
          grid.text(label, x, y, gp = gpar(fontsize = 10))
        },
        # Customize the heatmap layout
        cluster_rows = TRUE,
        show_row_dend = FALSE,
        cluster_columns = FALSE,
        column_title = "Interest",
        row_title = "Target",
        heatmap_legend_param = list(at = c(0, 1, 2, 3), 
                                    labels = c("0", "1", "2", "3")))
Reg loops
#### Importing loops of interest
interestName <- "regLoop"
# BACKGROUND loop
background <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe"))
background.gr <- (extractAnchor(background))
# UP loop
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_UP_diff0.2.bedpe"))
loop.up.gr <- (extractAnchor(loop.up))
# NO loop
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_NO_diff0.2.bedpe"))
loop.no.gr <- (extractAnchor(loop.no))
# UPNO loop
loop.upno <- bind_rows(loop.up, loop.no)
loop.upno.gr <- extractAnchor(loop.upno)
# DOWN loop
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_DOWN_diff0.2.bedpe"))
loop.down.gr <- (extractAnchor(loop.down))
Figure - H3K27ac only
targetName <- "H3K27ac_effie"
# RETAINED loop
retained <- fread(here(refDir, "TF_bookmarking", targetName, "am_a.bed")) %>% dplyr::select(c(1, 2, 3))
colnames(retained) <- c("chr", "start", "end")
retained.gr <- makeGRangesFromDataFrame(retained)
# LOST loop
lost <- fread(here(refDir, "TF_bookmarking", targetName, "oa.bed")) %>% dplyr::select(c(1, 2, 3))
colnames(lost) <- c("chr", "start", "end")
lost.gr <- makeGRangesFromDataFrame(lost)


# SEEDING
temp <- runFisherExact(loop.upno.gr, background.gr, retained.gr)
result.tb <- tibble(interest = paste0(interestName, "_UP/NO"),
                    target = paste0(targetName, "_retained"),
                    pvalue = temp$p.value,
                    oddsRatio = temp$estimate)

# ADDING ROWS
temp <- runFisherExact(loop.down.gr, background.gr, retained.gr)
result.tb <- result.tb %>% 
  add_row(interest = paste0(interestName, "_DOWN"),
          target = paste0(targetName, "_retained"),
          pvalue = temp$p.value,
          oddsRatio = temp$estimate)

# ADDING ROWS
temp <- runFisherExact(loop.upno.gr, background.gr, lost.gr)
result.tb <- result.tb %>% 
  add_row(interest = paste0(interestName, "_UP/NO"),
          target = paste0(targetName, "_lost"),
          pvalue = temp$p.value,
          oddsRatio = temp$estimate)

# ADDING ROWS
temp <- runFisherExact(loop.down.gr, background.gr, lost.gr)
result.tb <- result.tb %>% 
  add_row(interest = paste0(interestName, "_DOWN"),
          target = paste0(targetName, "_lost"),
          pvalue = temp$p.value,
          oddsRatio = temp$estimate)


result.tb <- result.tb %>%
  mutate(interest = recode(interest, 
                           "regLoop_UP/NO" = "UP/NO",
                           "regLoop_DOWN" = "DOWN"),
         target = recode(target,
                         "H3K27ac_effie_retained" = "Retained",
                         "H3K27ac_effie_lost" = "Lost"))

result.tb$interest <- factor(result.tb$interest, levels = c("UP/NO", "DOWN"))

p <- ggplot(result.tb, aes(x = interest, y = target, size = -log10(pvalue), fill = oddsRatio)) +
  geom_point(shape = 21,        # Ensures a point with an outline
             stroke = 0.5*ptToMM      # Line width for the border
  ) + theme_bw() + 
  scale_size_continuous(range = c(1, 3)) +  # Set min and max point sizes here
  scale_fill_gradientn(colors = c("#4852A0", "white", "#CB333A"),  # Define gradient colors
                      values = scales::rescale(c(0.5, 1, 1.5)), limits = c(0.5, 1.5), 
                      #low = "white", high = "#CB333A",
                      #                  limits = c(1, 3),
                      oob = scales::squish, # Define gradient colors
                      guide = guide_colorbar(
                        barwidth = 1.5/5.08,  # Adjust width of the color bar
                        barheight = 15/5.08   # Adjust height of the color bar
                      )
  ) +
  labs(x = NULL, y = NULL)  +
  theme(
    axis.title = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )

fileName <- here(figDir, "heatmap_H3K27ac_bookmarking_reg_dotplot")
width <- panelSize(1.7)*mmToInch
height <- panelSize(1.1)*mmToInch
# png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
# print(p)
# dev.off()
svglite(paste0(fileName, ".svg"),  height = height, width = width)
print(p)
dev.off()

  
Figure - all targets
#### Importing loops of target

temp1 <- runFisherExactCombinationTargetBinary("ESRRB_dsg", interestName,
                                      loop.upno.gr, loop.down.gr,
                                      background.gr)
temp2 <- runFisherExactCombinationTargetBinary("H3K27ac_effie", interestName,
                                      loop.upno.gr, loop.down.gr,
                                      background.gr)
temp3 <- runFisherExactCombinationTargetBinary("KLF4_effie", interestName,
                                      loop.upno.gr, loop.down.gr,
                                      background.gr)
temp4 <- runFisherExactCombinationTargetBinary("OCT4_effie", interestName,
                                      loop.upno.gr, loop.down.gr,
                                      background.gr)
temp5 <- runFisherExactCombinationTargetBinary("SOX2_effie", interestName,
                                      loop.upno.gr, loop.down.gr,
                                      background.gr)


data <- bind_rows(temp1, temp2, temp3, temp4, temp5)



result.tb <- data %>%
  mutate(interest = recode(interest, 
                           "regLoop_UP/NO" = "UP/NO",
                           "regLoop_DOWN" = "DOWN"),
         target = recode(target,
                         "H3K27ac_effie_retained" = "H3K27ac_retained",
                         "H3K27ac_effie_lost" = "H3K27ac_lost",
                         "ESRRB_dsg_retained" = "ESRRB_retained",
                         "ESRRB_dsg_lost" = "ESRRB_lost",
                         "KLF4_effie_retained" = "KLF4_retained",
                         "KLF4_effie_lost" = "KLF4_lost",
                         "OCT4_effie_retained" = "OCT4_retained",
                         "OCT4_effie_lost" = "OCT4_lost",
                         "SOX2_effie_retained" = "SOX2_retained",
                         "SOX2_effie_lost" = "SOX2_lost"))

result.tb$interest <- factor(result.tb$interest, levels = c("UP/NO", "DOWN"))
result.tb$target <- factor(result.tb$target, levels = rev(c("H3K27ac_retained", "H3K27ac_lost",
                                                        "ESRRB_retained", "ESRRB_lost",
                                                        "KLF4_retained", "KLF4_lost",
                                                        "OCT4_retained", "OCT4_lost",
                                                        "SOX2_retained","SOX2_lost")))

p <- ggplot(result.tb, aes(x = interest, y = target, size = -log10(pvalue), fill = oddsRatio)) +
  geom_point(shape = 21,        # Ensures a point with an outline
             stroke = 1*ptToMM      # Line width for the border
  ) + theme_bw() + 
  scale_size_continuous(range = c(1, 3)) +  # Set min and max point sizes here
  scale_fill_gradientn(colors = c("#4852A0", "white", "#CB333A"),  # Define gradient colors
                      values = scales::rescale(c(0.5, 1, 1.5)), limits = c(0.5, 1.5), 
                      #low = "white", high = "#CB333A",
                      #                  limits = c(1, 3),
                      oob = scales::squish, # Define gradient colors
                      guide = guide_colorbar(
                        barwidth = 1.5/5.08,  # Adjust width of the color bar
                        barheight = 15/5.08   # Adjust height of the color bar
                      )
  ) +
  labs(x = NULL, y = NULL)  +
  theme(
    axis.title = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )

fileName <- here(figDir, "heatmap_alltarget_bookmarking_reg_dotplot")
width <- panelSize(2.2)*mmToInch
height <- panelSize(2)*mmToInch
# png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
# print(p)
# dev.off()
svglite(paste0(fileName, ".svg"),  height = height, width = width)
print(p)
dev.off()
Retained & Lost
#### Importing loops of target
temp1 <- runFisherExactCombinationTarget("ATAC_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp2 <- runFisherExactCombinationTarget("CTCF", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp3 <- runFisherExactCombinationTarget("ESRRB_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp4 <- runFisherExactCombinationTarget("ESRRB_NCB", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp5 <- runFisherExactCombinationTarget("ESRRB_pfa", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp6 <- runFisherExactCombinationTarget("H3K27ac_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp7 <- runFisherExactCombinationTarget("KLF4_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp8 <- runFisherExactCombinationTarget("NANOG_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp9 <- runFisherExactCombinationTarget("NANOG_pfa", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp10 <- runFisherExactCombinationTarget("OCT4_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp11 <- runFisherExactCombinationTarget("OCT4_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp12 <- runFisherExactCombinationTarget("OCT4_pfa", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp13 <- runFisherExactCombinationTarget("SMC1", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp14 <- runFisherExactCombinationTarget("SOX2_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp15 <- runFisherExactCombinationTarget("SOX2_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp16 <- runFisherExactCombinationTarget("SOX2_pfa", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp17 <- runFisherExactCombinationTarget("TBP", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)




data <- bind_rows(temp1, temp2, temp3, temp4, temp5,
         temp6, temp7, temp8, temp9, temp10,
         temp11, temp12, temp13, temp14, temp15, temp16 ,temp17)

# Visualization
library(circlize)

heatmap_data <- data %>% dplyr::select(target, interest, oddsRatio) %>%
pivot_wider(names_from = interest, values_from = oddsRatio) %>%
  column_to_rownames(var = "target")

pvalue_data <- data %>% dplyr::select(target, interest, pvalue) %>%
  pivot_wider(names_from = interest, values_from = pvalue) %>%
  column_to_rownames(var = "target")

col_fun <- colorRamp2(c(0, 1, 5), 
                      c("blue", "white", "red"))

Heatmap(as.matrix(heatmap_data),
        name = "Odds Ratio",
        col = col_fun,
        # Add annotation for p-values
        cell_fun = function(j, i, x, y, width, height, fill) {
          pval <- pvalue_data[i, j]
          label <- ifelse(pval > 0.05, "n.s.", sprintf("%.2e", pval))
          grid.text(label, x, y, gp = gpar(fontsize = 10))
        },
        # Customize the heatmap layout
        cluster_rows = TRUE,
        show_row_dend = FALSE,
        cluster_columns = FALSE,
        column_title = "Interest",
        row_title = "Target",
        heatmap_legend_param = list(at = c(0, 1, 2, 3, 4, 5), 
                                    labels = c("0", "1", "2", "3", "4", "5")))
Retained

#### Importing loops of target
temp1 <- runFisherExactCombinationTargetRetained("ATAC_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp2 <- runFisherExactCombinationTargetRetained("CTCF", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp3 <- runFisherExactCombinationTargetRetained("ESRRB_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp4 <- runFisherExactCombinationTargetRetained("ESRRB_NCB", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp5 <- runFisherExactCombinationTargetRetained("ESRRB_pfa", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp6 <- runFisherExactCombinationTargetRetained("H3K27ac_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp7 <- runFisherExactCombinationTargetRetained("KLF4_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp8 <- runFisherExactCombinationTargetRetained("NANOG_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp9 <- runFisherExactCombinationTargetRetained("NANOG_pfa", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp10 <- runFisherExactCombinationTargetRetained("OCT4_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp11 <- runFisherExactCombinationTargetRetained("OCT4_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp12 <- runFisherExactCombinationTargetRetained("OCT4_pfa", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp13 <- runFisherExactCombinationTargetRetained("SMC1", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp14 <- runFisherExactCombinationTargetRetained("SOX2_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp15 <- runFisherExactCombinationTargetRetained("SOX2_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp16 <- runFisherExactCombinationTargetRetained("SOX2_pfa", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp17 <- runFisherExactCombinationTargetRetained("TBP", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)




data <- bind_rows(temp1, temp2, temp3, temp4, temp5,
         temp6, temp7, temp8, temp9, temp10,
         temp11, temp12, temp13, temp14, temp15, temp16 ,temp17)

# Visualization
library(circlize)

heatmap_data <- data %>% dplyr::select(target, interest, oddsRatio) %>%
pivot_wider(names_from = interest, values_from = oddsRatio) %>%
  column_to_rownames(var = "target")

pvalue_data <- data %>% dplyr::select(target, interest, pvalue) %>%
  pivot_wider(names_from = interest, values_from = pvalue) %>%
  column_to_rownames(var = "target")

col_fun <- colorRamp2(c(0, 1, 3), 
                      c("blue", "white", "red"))

Heatmap(as.matrix(heatmap_data),
        name = "Odds Ratio",
        col = col_fun,
        # Add annotation for p-values
        cell_fun = function(j, i, x, y, width, height, fill) {
          pval <- pvalue_data[i, j]
          label <- ifelse(pval > 0.05, "n.s.", sprintf("%.2e", pval))
          grid.text(label, x, y, gp = gpar(fontsize = 10))
        },
        # Customize the heatmap layout
        cluster_rows = TRUE,
        show_row_dend = FALSE,
        cluster_columns = FALSE,
        column_title = "Interest",
        row_title = "Target",
        heatmap_legend_param = list(at = c(0, 1, 2, 3), 
                                    labels = c("0", "1", "2", "3")))

Reg refined

Retained & Lost


#### Importing loops of target
temp1 <- runFisherExactCombinationTarget("ATAC_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp2 <- runFisherExactCombinationTarget("CTCF", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp3 <- runFisherExactCombinationTarget("ESRRB_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp4 <- runFisherExactCombinationTarget("H3K27ac_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp5 <- runFisherExactCombinationTarget("KLF4_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp6 <- runFisherExactCombinationTarget("NANOG_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp7 <- runFisherExactCombinationTarget("OCT4_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp8 <- runFisherExactCombinationTarget("SMC1", interestName,
                                     loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp9 <- runFisherExactCombinationTarget("SOX2_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp10 <- runFisherExactCombinationTarget("TBP", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)




data <- bind_rows(temp1, temp2, temp3, temp4, temp5,
         temp6, temp7, temp8, temp9, temp10)

# Visualization
library(circlize)

heatmap_data <- data %>% dplyr::select(target, interest, oddsRatio) %>%
pivot_wider(names_from = interest, values_from = oddsRatio) %>%
  column_to_rownames(var = "target")

pvalue_data <- data %>% dplyr::select(target, interest, pvalue) %>%
  pivot_wider(names_from = interest, values_from = pvalue) %>%
  column_to_rownames(var = "target")

col_fun <- colorRamp2(c(0, 1, 3), 
                      c("blue", "white", "red"))

Heatmap(as.matrix(heatmap_data),
        name = "Odds Ratio",
        col = col_fun,
        # Add annotation for p-values
        cell_fun = function(j, i, x, y, width, height, fill) {
          pval <- pvalue_data[i, j]
          label <- ifelse(pval > 0.05, "n.s.", sprintf("%.2e", pval))
          grid.text(label, x, y, gp = gpar(fontsize = 10))
        },
        # Customize the heatmap layout
        cluster_rows = TRUE,
        show_row_dend = FALSE,
        cluster_columns = FALSE,
        column_title = "Interest",
        row_title = "Target",
        heatmap_legend_param = list(at = c(0, 1, 2, 3), 
                                    labels = c("0", "1", "2", "3")))
Retained


#### Importing loops of target
temp1 <- runFisherExactCombinationTargetRetained("ATAC_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp2 <- runFisherExactCombinationTargetRetained("CTCF", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp3 <- runFisherExactCombinationTargetRetained("ESRRB_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp4 <- runFisherExactCombinationTargetRetained("H3K27ac_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp5 <- runFisherExactCombinationTargetRetained("KLF4_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp6 <- runFisherExactCombinationTargetRetained("NANOG_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp7 <- runFisherExactCombinationTargetRetained("OCT4_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp8 <- runFisherExactCombinationTargetRetained("SMC1", interestName,
                                     loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp9 <- runFisherExactCombinationTargetRetained("SOX2_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp10 <- runFisherExactCombinationTargetRetained("TBP", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)




data <- bind_rows(temp1, temp2, temp3, temp4, temp5,
         temp6, temp7, temp8, temp9, temp10)

# Visualization
library(circlize)

heatmap_data <- data %>% dplyr::select(target, interest, oddsRatio) %>%
pivot_wider(names_from = interest, values_from = oddsRatio) %>%
  column_to_rownames(var = "target")

pvalue_data <- data %>% dplyr::select(target, interest, pvalue) %>%
  pivot_wider(names_from = interest, values_from = pvalue) %>%
  column_to_rownames(var = "target")

col_fun <- colorRamp2(c(0, 1, 3), 
                      c("blue", "white", "red"))

Heatmap(as.matrix(heatmap_data),
        name = "Odds Ratio",
        col = col_fun,
        # Add annotation for p-values
        cell_fun = function(j, i, x, y, width, height, fill) {
          pval <- pvalue_data[i, j]
          label <- ifelse(pval > 0.05, "n.s.", sprintf("%.2e", pval))
          grid.text(label, x, y, gp = gpar(fontsize = 10))
        },
        # Customize the heatmap layout
        cluster_rows = TRUE,
        show_row_dend = FALSE,
        cluster_columns = FALSE,
        column_title = "Interest",
        row_title = "Target",
        heatmap_legend_param = list(at = c(0, 1, 2, 3), 
                                    labels = c("0", "1", "2", "3")))

[2.24.2] Checking bookmarking - A485

All loops

#### Importing loops of interest
interestName <- "allLoop"
# BACKGROUND loop
background <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all.bedpe"))
background.gr <- (extractAnchor(background))
# UP loop
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_A485vsDMSO_UP_diff0.2.bedpe"))
loop.up.gr <- (extractAnchor(loop.up))
# NO loop
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_A485vsDMSO_NO_diff0.2.bedpe"))
loop.no.gr <- (extractAnchor(loop.no))
# DOWN loop
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_A485vsDMSO_DOWN_diff0.2.bedpe"))
loop.down.gr <- (extractAnchor(loop.down))
All loops refined


#### Importing loops of target
temp1 <- runFisherExactCombinationTarget("ATAC_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp2 <- runFisherExactCombinationTarget("CTCF", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp3 <- runFisherExactCombinationTarget("ESRRB_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp4 <- runFisherExactCombinationTarget("H3K27ac_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp5 <- runFisherExactCombinationTarget("KLF4_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp6 <- runFisherExactCombinationTarget("NANOG_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp7 <- runFisherExactCombinationTarget("OCT4_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp8 <- runFisherExactCombinationTarget("SMC1", interestName,
                                     loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp9 <- runFisherExactCombinationTarget("SOX2_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp10 <- runFisherExactCombinationTarget("TBP", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)




data <- bind_rows(temp1, temp2, temp3, temp4, temp5,
         temp6, temp7, temp8, temp9, temp10)

# Visualization
library(circlize)

heatmap_data <- data %>% dplyr::select(target, interest, oddsRatio) %>%
pivot_wider(names_from = interest, values_from = oddsRatio) %>%
  column_to_rownames(var = "target")

pvalue_data <- data %>% dplyr::select(target, interest, pvalue) %>%
  pivot_wider(names_from = interest, values_from = pvalue) %>%
  column_to_rownames(var = "target")

col_fun <- colorRamp2(c(0, 1, 3), 
                      c("blue", "white", "red"))

Heatmap(as.matrix(heatmap_data),
        name = "Odds Ratio",
        col = col_fun,
        # Add annotation for p-values
        cell_fun = function(j, i, x, y, width, height, fill) {
          pval <- pvalue_data[i, j]
          label <- ifelse(pval > 0.05, "n.s.", sprintf("%.2e", pval))
          grid.text(label, x, y, gp = gpar(fontsize = 10))
        },
        # Customize the heatmap layout
        cluster_rows = TRUE,
        show_row_dend = FALSE,
        cluster_columns = FALSE,
        column_title = "Interest",
        row_title = "Target",
        heatmap_legend_param = list(at = c(0, 1, 2, 3), 
                                    labels = c("0", "1", "2", "3")))

Reg loops

#### Importing loops of interest
interestName <- "regLoop"
# BACKGROUND loop
background <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe"))
background.gr <- (extractAnchor(background))
# UP loop
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_UP_diff0.2.bedpe"))
loop.up.gr <- (extractAnchor(loop.up))
# NO loop
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_NO_diff0.2.bedpe"))
loop.no.gr <- (extractAnchor(loop.no))
# DOWN loop
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_DOWN_diff0.2.bedpe"))
loop.down.gr <- (extractAnchor(loop.down))

fwrite(as_tibble(loop.up.gr) %>% dplyr::select(seqnames, start, end), here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_UP_diff0.2.anchor.bed"), col.names = FALSE, sep = "\t")
fwrite(as_tibble(loop.no.gr) %>% dplyr::select(seqnames, start, end), here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_NO_diff0.2.anchor.bed"), col.names = FALSE, sep = "\t")
fwrite(as_tibble(loop.down.gr) %>% dplyr::select(seqnames, start, end), here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_DOWN_diff0.2.anchor.bed"), col.names = FALSE, sep = "\t")
Retained & Lost


#### Importing loops of target
temp1 <- runFisherExactCombinationTarget("ATAC_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp2 <- runFisherExactCombinationTarget("CTCF", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp3 <- runFisherExactCombinationTarget("ESRRB_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp4 <- runFisherExactCombinationTarget("H3K27ac_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp5 <- runFisherExactCombinationTarget("KLF4_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp6 <- runFisherExactCombinationTarget("NANOG_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp7 <- runFisherExactCombinationTarget("OCT4_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp8 <- runFisherExactCombinationTarget("SMC1", interestName,
                                     loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp9 <- runFisherExactCombinationTarget("SOX2_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp10 <- runFisherExactCombinationTarget("TBP", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)




data <- bind_rows(temp1, temp2, temp3, temp4, temp5,
         temp6, temp7, temp8, temp9, temp10)

# Visualization
library(circlize)

heatmap_data <- data %>% dplyr::select(target, interest, oddsRatio) %>%
pivot_wider(names_from = interest, values_from = oddsRatio) %>%
  column_to_rownames(var = "target")

pvalue_data <- data %>% dplyr::select(target, interest, pvalue) %>%
  pivot_wider(names_from = interest, values_from = pvalue) %>%
  column_to_rownames(var = "target")

col_fun <- colorRamp2(c(0, 1, 3), 
                      c("blue", "white", "red"))

Heatmap(as.matrix(heatmap_data),
        name = "Odds Ratio",
        col = col_fun,
        # Add annotation for p-values
        cell_fun = function(j, i, x, y, width, height, fill) {
          pval <- pvalue_data[i, j]
          label <- ifelse(pval > 0.05, "n.s.", sprintf("%.2e", pval))
          grid.text(label, x, y, gp = gpar(fontsize = 10))
        },
        # Customize the heatmap layout
        cluster_rows = TRUE,
        show_row_dend = FALSE,
        cluster_columns = FALSE,
        column_title = "Interest",
        row_title = "Target",
        heatmap_legend_param = list(at = c(0, 1, 2, 3), 
                                    labels = c("0", "1", "2", "3")))

DEGs from RNA-seq

2.5kb from TSS, at least 1 bp overlap #### 2i

gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3)) %>%
  dplyr::select(V1, TSS, V6)
colnames(gene.tb) <- c("chr", "TSS", "ensembl")

diff.RNA <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
diff.RNA <- diff.RNA %>% dplyr::left_join(gene.tb, by = c("ensembl_gene_id" = "ensembl")) %>%
  dplyr::filter(!is.na(TSS))
alpha <- 0.05
fcCutoff <- 0.5

diff.RNA <- diff.RNA %>% dplyr::mutate(diff = case_when(padj < alpha & shrinked_log2FC > fcCutoff ~ "UP",
                                                        padj < alpha & shrinked_log2FC < -fcCutoff ~ "DOWN",
                                                        TRUE ~ "NO"))


temp <- diff.RNA %>% dplyr::mutate(start = TSS - 2500,
                           end = TSS + 2500) %>%
  dplyr::select(chr, start, end, ensembl_gene_id, diff)


down.gr <- makeGRangesFromDataFrame(temp %>% dplyr::filter(diff == "DOWN"))
up.gr <- makeGRangesFromDataFrame(temp %>% dplyr::filter(diff == "UP"))
no.gr <- makeGRangesFromDataFrame(temp %>% dplyr::filter(diff == "NO"))
all.gr <- makeGRangesFromDataFrame(temp)

data <- runFisherExactCombinationTarget("H3K27ac_effie", "TSS",
                                      up.gr, no.gr, down.gr,
                                      all.gr)


# Visualization
library(circlize)

heatmap_data <- data %>% dplyr::select(target, interest, oddsRatio) %>%
pivot_wider(names_from = interest, values_from = oddsRatio) %>%
  column_to_rownames(var = "target")

pvalue_data <- data %>% dplyr::select(target, interest, pvalue) %>%
  pivot_wider(names_from = interest, values_from = pvalue) %>%
  column_to_rownames(var = "target")

col_fun <- colorRamp2(c(0, 1, 2), 
                      c("blue", "white", "red"))

Heatmap(as.matrix(heatmap_data),
        name = "Odds Ratio",
        col = col_fun,
        # Add annotation for p-values
        cell_fun = function(j, i, x, y, width, height, fill) {
          pval <- pvalue_data[i, j]
          label <- ifelse(pval > 0.05, "n.s.", sprintf("%.2e", pval))
          grid.text(label, x, y, gp = gpar(fontsize = 10))
        },
        # Customize the heatmap layout
        cluster_rows = TRUE,
        show_row_dend = FALSE,
        cluster_columns = FALSE,
        column_title = "Interest",
        row_title = "Target",
        heatmap_legend_param = list(at = c(0, 1, 2), 
                                    labels = c("0", "1", "2")))


## GO TEST
diff.RNA <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
diff.RNA <- diff.RNA %>% dplyr::left_join(gene.tb, by = c("ensembl_gene_id" = "ensembl")) %>%
  dplyr::filter(!is.na(TSS))
alpha <- 0.05
fcCutoff <- 0.5

diff.RNA <- diff.RNA %>% dplyr::mutate(diff = case_when(padj < alpha & shrinked_log2FC > fcCutoff ~ "UP",
                                                        padj < alpha & shrinked_log2FC < -fcCutoff ~ "DOWN",
                                                        TRUE ~ "NO"))
geneList <- (diff.RNA %>% dplyr::filter(diff == "DOWN"))$ensembl_gene_id
GO <- enrichGO(gene = geneList, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO.readable <- setReadable(GO, OrgDb = org.Mm.eg.db)

downStemGene <- unique(unlist((as.data.frame(GO) %>% dplyr::slice(c(4, 5, 14, 66)) %>% dplyr::mutate(geneID = strsplit(geneID, "/")))$geneID))
downStem.gr <- makeGRangesFromDataFrame(temp %>% dplyr::filter(ensembl_gene_id %in% downStemGene))

print(dotplot(GO, showCategory = 15, title = "") + 
        scale_color_continuous(limits = c(0, 0.05), low = "red", high = "black"))
geneList <- (diff.RNA %>% dplyr::filter(diff == "UP"))$ensembl_gene_id
GO <- enrichGO(gene = geneList, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO.readable <- setReadable(GO, OrgDb = org.Mm.eg.db)
print(dotplot(GO, showCategory = 15, title = "") + 
        scale_color_continuous(limits = c(0, 0.05), low = "red", high = "black"))


downNONStem.gr <- makeGRangesFromDataFrame(temp %>% dplyr::filter(diff == "DOWN",
                                                               !(ensembl_gene_id %in% downStemGene)))

print(dotplot(GO, showCategory = 15, title = "") + 
        scale_color_continuous(limits = c(0, 0.05), low = "red", high = "black"))
geneList <- (diff.RNA %>% dplyr::filter(diff == "UP"))$ensembl_gene_id
GO <- enrichGO(gene = geneList, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO.readable <- setReadable(GO, OrgDb = org.Mm.eg.db)
print(dotplot(GO, showCategory = 15, title = "") + 
        scale_color_continuous(limits = c(0, 0.05), low = "red", high = "black"))



### Checking bookmarking only in stem gene

data <- runFisherExactCombinationTarget("H3K27ac_effie", "TSS",
                                      down.gr, downNONStem.gr, downStem.gr,
                                      all.gr)


# Visualization
library(circlize)

heatmap_data <- data %>% dplyr::select(target, interest, oddsRatio) %>%
pivot_wider(names_from = interest, values_from = oddsRatio) %>%
  column_to_rownames(var = "target")

pvalue_data <- data %>% dplyr::select(target, interest, pvalue) %>%
  pivot_wider(names_from = interest, values_from = pvalue) %>%
  column_to_rownames(var = "target")

col_fun <- colorRamp2(c(0, 1, 2), 
                      c("blue", "white", "red"))

Heatmap(as.matrix(heatmap_data),
        name = "Odds Ratio",
        col = col_fun,
        # Add annotation for p-values
        cell_fun = function(j, i, x, y, width, height, fill) {
          pval <- pvalue_data[i, j]
          label <- ifelse(pval > 0.05, "n.s.", sprintf("%.2e", pval))
          grid.text(label, x, y, gp = gpar(fontsize = 10))
        },
        # Customize the heatmap layout
        cluster_rows = TRUE,
        show_row_dend = FALSE,
        cluster_columns = FALSE,
        column_title = "Interest",
        row_title = "Target",
        heatmap_legend_param = list(at = c(0, 1, 2), 
                                    labels = c("0", "1", "2")))

Epi

gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3)) %>%
  dplyr::select(V1, TSS, V6)
colnames(gene.tb) <- c("chr", "TSS", "ensembl")

diff.RNA <- fread(here(refDir, "diff_G1.A485.selected2_G1.Epi.A485_vs_G1.Epi.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
diff.RNA <- diff.RNA %>% dplyr::left_join(gene.tb, by = c("ensembl_gene_id" = "ensembl")) %>%
  dplyr::filter(!is.na(TSS))
alpha <- 0.05
fcCutoff <- 0.5

diff.RNA <- diff.RNA %>% dplyr::mutate(diff = case_when(padj < alpha & shrinked_log2FC > fcCutoff ~ "UP",
                                                        padj < alpha & shrinked_log2FC < -fcCutoff ~ "DOWN",
                                                        TRUE ~ "NO"))


temp <- diff.RNA %>% dplyr::mutate(start = TSS - 2500,
                           end = TSS + 2500) %>%
  dplyr::select(chr, start, end, ensembl_gene_id, diff)


down.gr <- makeGRangesFromDataFrame(temp %>% dplyr::filter(diff == "DOWN"))
up.gr <- makeGRangesFromDataFrame(temp %>% dplyr::filter(diff == "UP"))
no.gr <- makeGRangesFromDataFrame(temp %>% dplyr::filter(diff == "NO"))
all.gr <- makeGRangesFromDataFrame(temp)

data <- runFisherExactCombinationTarget("H3K27ac_effie", "TSS",
                                      up.gr, no.gr, down.gr,
                                      all.gr)


# Visualization
library(circlize)

heatmap_data <- data %>% dplyr::select(target, interest, oddsRatio) %>%
pivot_wider(names_from = interest, values_from = oddsRatio) %>%
  column_to_rownames(var = "target")

pvalue_data <- data %>% dplyr::select(target, interest, pvalue) %>%
  pivot_wider(names_from = interest, values_from = pvalue) %>%
  column_to_rownames(var = "target")

col_fun <- colorRamp2(c(0, 1, 2), 
                      c("blue", "white", "red"))

Heatmap(as.matrix(heatmap_data),
        name = "Odds Ratio",
        col = col_fun,
        # Add annotation for p-values
        cell_fun = function(j, i, x, y, width, height, fill) {
          pval <- pvalue_data[i, j]
          label <- ifelse(pval > 0.05, "n.s.", sprintf("%.2e", pval))
          grid.text(label, x, y, gp = gpar(fontsize = 10))
        },
        # Customize the heatmap layout
        cluster_rows = TRUE,
        show_row_dend = FALSE,
        cluster_columns = FALSE,
        column_title = "Interest",
        row_title = "Target",
        heatmap_legend_param = list(at = c(0, 1, 2), 
                                    labels = c("0", "1", "2")))


## GO
diff.RNA <- fread(here(refDir, "diff_G1.A485.selected2_G1.Epi.A485_vs_G1.Epi.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
diff.RNA <- diff.RNA %>% dplyr::left_join(gene.tb, by = c("ensembl_gene_id" = "ensembl")) %>%
  dplyr::filter(!is.na(TSS))
alpha <- 0.05
fcCutoff <- 0.5

diff.RNA <- diff.RNA %>% dplyr::mutate(diff = case_when(padj < alpha & shrinked_log2FC > fcCutoff ~ "UP",
                                                        padj < alpha & shrinked_log2FC < -fcCutoff ~ "DOWN",
                                                        TRUE ~ "NO"))

geneList <- (diff.RNA %>% dplyr::filter(diff == "DOWN"))$ensembl_gene_id
GO <- enrichGO(gene = geneList, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO.readable <- setReadable(GO, OrgDb = org.Mm.eg.db)
print(dotplot(GO, showCategory = 15, title = "") + 
        scale_color_continuous(limits = c(0, 0.05), low = "red", high = "black"))
geneList <- (diff.RNA %>% dplyr::filter(diff == "UP"))$ensembl_gene_id
GO <- enrichGO(gene = geneList, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO.readable <- setReadable(GO, OrgDb = org.Mm.eg.db)
print(dotplot(GO, showCategory = 15, title = "") + 
        scale_color_continuous(limits = c(0, 0.05), low = "red", high = "black"))

[2.26] Checking the ChIP intensity at anchors

This doesn’t have to be restricted to P-N loops. Let’s check all loops first. Also instead of density (mean), median could be better choice. Sum is not appropriate here since the size of anchors are not same. #### Limited to peaks to reduce noise ##### dTAG

getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(loop ==group1) )$sumScore
  distance2 <- (data %>% dplyr::filter(loop ==group2) )$sumScore
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

name <- "chromo_cons_annoHierarchy"

# UP loop
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_UP_diff0.2.bedpe"))
anchor.up <- (extractAnchor(loop.up))
# NO loop
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_NO_diff0.2.bedpe"))
anchor.no <- (extractAnchor(loop.no))
# UP NO
loop.upno <- bind_rows(loop.up, loop.no)
anchor.upno <- (extractAnchor(loop.upno))
# DOWN loop
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_DOWN_diff0.2.bedpe"))
anchor.down <- (extractAnchor(loop.down))

getSumScores <- function(track, anchor) {
  # Find overlaps between all anchors and track regions at once
  overlaps <- findOverlaps(anchor, track)
  
  # Extract the scores and corresponding anchor indices
  anchor_indices <- queryHits(overlaps)
  track_scores <- score(track)[subjectHits(overlaps)]
  
  # Use tapply to calculate the median scores for each anchor
  median_scores <- tapply(track_scores, anchor_indices, mean, na.rm = TRUE)
  
  # Initialize a numeric vector to store the median scores for each anchor
  all_median_scores <- rep(NA, length(anchor))
  
  # Populate the median scores for the anchors that have overlaps
  all_median_scores[as.numeric(names(median_scores))] <- median_scores
  
  return(all_median_scores)
}

plotSumScores <- function(track, peak, name){
  peakTrack <- track[unique(queryHits(findOverlaps(track, peak)))]
  a <- getSumScores(peakTrack, anchor.up)
  b <- getSumScores(peakTrack, anchor.no)
  c <- getSumScores(peakTrack, anchor.down)
  a.tb <- tibble(loop = "UP",
                 sumScore = a)
  b.tb <- tibble(loop = "NO",
                 sumScore = b)
  c.tb <- tibble(loop = "DOWN",
                 sumScore = c)
  data <- bind_rows(a.tb, b.tb, c.tb) %>% drop_na()
  ggplot(data, aes(x = loop, y = sumScore)) + 
    labs(x = NULL, y = paste0(name, " average peak score per anchor")) +
    geom_violin(aes(fill = group), color = "black",
                linewidth = lineThick * mmToLineUnit, lineend = "square",
                show.legend = FALSE) +
    geom_boxplot(width = 0.1, color = "black",
                 linewidth = lineThick * mmToLineUnit, lineend = "square",
                 outlier.size = 1, outlier.stroke = NA) + theme_classic() +
    stat_summary(
      aes(group = loop), fun = mean,
      geom = "point", shape = 21, size = 1,
      fill = "red", color = "black"
    ) +
    coord_cartesian(ylim = c(0, quantile(data$sumScore, 0.95))) + theme(
    axis.title = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45, hjust = 1, vjust = 1
    ),
    axis.line = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )
}

plotSumScoresBinary <- function(track, peak, name, anchor.upno, anchor.down){
  peakTrack <- track[unique(queryHits(findOverlaps(track, peak)))]
  b <- getSumScores(peakTrack, anchor.upno)
  c <- getSumScores(peakTrack, anchor.down)
  b.tb <- tibble(loop = "UP/NO",
                 sumScore = b)
  c.tb <- tibble(loop = "DOWN",
                 sumScore = c)
  data <- bind_rows(b.tb, c.tb) %>% drop_na()
  data$loop <- factor(data$loop, levels = c("UP/NO", "DOWN"))
  
  p12 <- getPvalWilcox(data, "UP/NO", "DOWN")
  p <-  ggplot(data, aes(x = name, fill = loop, y = sumScore)) + 
    labs(x = NULL, y = paste0("Average ChIP peak score at anchor)")) +
    introdataviz::geom_split_violin(linewidth = lineThick * mmToLineUnit, lineend = "square",
                                    alpha = .4) +
    geom_boxplot(width = 0.3, color = "black",
                 linewidth = lineThick * mmToLineUnit, lineend = "square",
                 outlier.shape = NA,  alpha = 0.6, show.legend = FALSE) + theme_classic() +
    stat_summary(
      aes(group = loop), fun = mean,
      geom = "point", shape = 21, size = 0.5,
      fill = "black", color = "black", position = position_dodge(.3)
    ) + theme(
      axis.title = element_text(
        size = fontSizeM, family = fontType, color = "#000000"
      ),
      axis.text = element_text(
        size = fontSizeM, family = fontType, color = "#000000"
      ),
      axis.text.x = element_text(
        angle = 0,
      ),
      axis.text.y = element_text(
        size = fontSizeS,
      ),
      axis.line = element_line(
        color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
      ),
      axis.ticks = element_line(
        color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
      ),
      panel.background = element_rect(fill = "transparent"),
      legend.text = element_text(family = fontType, size = fontSizeS),
      legend.title = element_text(family = fontType, size = fontSizeS)
    ) + 
    coord_cartesian(ylim = c(quantile(data$sumScore, 0.0), quantile(data$sumScore, 0.9))) + 
    annotate(
     "text", x = 1, y = quantile(data$sumScore, 0.5),
     label = paste0("p12: ", convPvalue(p12)),
     color = "black", hjust = 0, size = 3
   ) +   guides(
    fill = guide_legend(
        keywidth = 0.2,  # Adjust the width of the legend keys
        keyheight = 0.2  # Adjust the height of the legend keys
    )
  )
  
   fileName <- paste0("ChIP_peak_avgPeakScore_", name)
  width <- panelSize(1.5)*mmToInch
  height <- panelSize(1.2)*mmToInch
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}

# Sum peak score
####
track <- import(here(refDir, "33255_H3K4me3_04-745_Bruce-4_trim_q20_dedup_black_depthNorm.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "33255_H3K4me3_04-745_Bruce-4_peaks.mergePeak.bed"))
#plotSumScores(track, peak, "H3K4me3")
plotSumScoresBinary(track, peak, "H3K4me3", anchor.upno, anchor.down)

track <- import(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.narrowPeak.bed"))
#plotSumScores(track, peak, "H3K27ac")
plotSumScoresBinary(track, peak, "H3K27ac", anchor.upno, anchor.down)

track <- import(here(refDir, "GSM2683440_J1_H3K14ac_mm10Lifted.black.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "GSM2683440_J1_H3K14ac_mm10Lifted.bed"))
#plotSumScores(track, peak, "H3K14ac")
plotSumScoresBinary(track, peak, "H3K14ac", anchor.upno, anchor.down)

track <- import(here(refDir, "33248_CTCF_07-729_Bruce-4_trim_q20_dedup_black_depthNorm.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "33248_CTCF_07-729_Bruce-4_peaks.mergePeak.bed"))
#plotSumScores(track, peak, "CTCF")
plotSumScoresBinary(track, peak, "CTCF", anchor.upno, anchor.down)

track <- import(here(refDir, "33250_RAD21_ab992_Bruce-4_trim_q20_dedup_black_depthNorm.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "33250_RAD21_ab992_Bruce-4_peaks.mergePeak.bed"))
#plotSumScores(track, peak, "RAD21")
plotSumScoresBinary(track, peak, "RAD21", anchor.upno, anchor.down)
A485
name <- "chromo_cons_annoHierarchy"

# UP loop
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_UP_diff0.2.bedpe"))
anchor.up <- (extractAnchor(loop.up))
# NO loop
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_NO_diff0.2.bedpe"))
anchor.no <- (extractAnchor(loop.no))
# DOWN loop
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_DOWN_diff0.2.bedpe"))
anchor.down <- (extractAnchor(loop.down))

getSumScores <- function(track, anchor) {
  # Find overlaps between all anchors and track regions at once
  overlaps <- findOverlaps(anchor, track)
  
  # Extract the scores and corresponding anchor indices
  anchor_indices <- queryHits(overlaps)
  track_scores <- score(track)[subjectHits(overlaps)]
  
  # Use tapply to calculate the median scores for each anchor
  median_scores <- tapply(track_scores, anchor_indices, sum, na.rm = TRUE)
  
  # Initialize a numeric vector to store the median scores for each anchor
  all_median_scores <- rep(NA, length(anchor))
  
  # Populate the median scores for the anchors that have overlaps
  all_median_scores[as.numeric(names(median_scores))] <- median_scores
  
  return(all_median_scores)
}

plotSumScores <- function(track, peak, name){
  peakTrack <- track[unique(queryHits(findOverlaps(track, peak)))]
  a <- getSumScores(peakTrack, anchor.up)
  b <- getSumScores(peakTrack, anchor.no)
  c <- getSumScores(peakTrack, anchor.down)
  a.tb <- tibble(loop = "UP",
                 sumScore = a)
  b.tb <- tibble(loop = "NO",
                 sumScore = b)
  c.tb <- tibble(loop = "DOWN",
                 sumScore = c)
  data <- bind_rows(a.tb, b.tb, c.tb) %>% drop_na()
  ggplot(data, aes(x = loop, y = sumScore)) + geom_boxplot(outlier.shape = NA) + theme_classic() + ggtitle(paste0(name, " sum peak")) +
    coord_cartesian(ylim = c(0, quantile(data$sumScore, 0.9)))
}

# Sum peak score
####
track <- import(here(refDir, "33255_H3K4me3_04-745_Bruce-4_trim_q20_dedup_black_depthNorm.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "33255_H3K4me3_04-745_Bruce-4_peaks.mergePeak.bed"))
plotSumScores(track, peak, "H3K4me3")

track <- import(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.narrowPeak.bed"))
plotSumScores(track, peak, "H3K27ac")

track <- import(here(refDir, "GSM2683440_J1_H3K14ac_mm10Lifted.black.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "GSM2683440_J1_H3K14ac_mm10Lifted.bed"))
plotSumScores(track, peak, "H3K14ac")

track <- import(here(refDir, "33248_CTCF_07-729_Bruce-4_trim_q20_dedup_black_depthNorm.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "33248_CTCF_07-729_Bruce-4_peaks.mergePeak.bed"))
plotSumScores(track, peak, "CTCF")

track <- import(here(refDir, "33250_RAD21_ab992_Bruce-4_trim_q20_dedup_black_depthNorm.bw"), format = "BigWig")
peak.temp <- fread(here(refDir, "33250_RAD21_ab992_Bruce-4_peaks.mergePeak.bed")) %>%
  dplyr::mutate(V2 = V2 - 1000,
                V3 = V3 + 1000)
colnames(peak.temp) <- c("chr", "start", "end")
peak <- makeGRangesFromDataFrame(peak.temp)
plotSumScores(track, peak, "YY1")


### For those with only summit
track <- import(here(refDir, "GSM5571895_ESC_YY1_1.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "GSM5571895_ESC_YY1_summit.bed"))
plotSumScores(track, peak, "RAD21")

### FOR THOSE WITHOUT PEAKS
track <- import(here(refDir, "GSM2082708_ESC.H3K27me3.1_mm10Lifted.black.bw"), format = "BigWig")
a <- getSumScores(track, anchor.up)
b <- getSumScores(track, anchor.no)
c <- getSumScores(track, anchor.down)
a.tb <- tibble(loop = "UP",
               sumScore = a)
b.tb <- tibble(loop = "NO",
               sumScore = b)
c.tb <- tibble(loop = "DOWN",
               sumScore = c)
data <- bind_rows(a.tb, b.tb, c.tb) %>% drop_na()
ggplot(data, aes(x = loop, y = sumScore)) + geom_boxplot(outlier.shape = NA) + theme_classic() + ggtitle(paste0("H3K27me3 sum no peak")) +
  coord_cartesian(ylim = c(0, quantile(data$sumScore, 0.9)))

Peak density instead of intensity

name <- "chromo_cons_annoHierarchy"

# UP loop
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_UP_diff0.2.bedpe"))
anchor.up <- (extractAnchor(loop.up))
# NO loop
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_NO_diff0.2.bedpe"))
anchor.no <- (extractAnchor(loop.no))
# DOWN loop
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_DOWN_diff0.2.bedpe"))
anchor.down <- (extractAnchor(loop.down))


peak.H3K4me3 <- importPeak(here(refDir, "33255_H3K4me3_04-745_Bruce-4_peaks.mergePeak.bed"))
peak.H3K27ac <- importPeak(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.narrowPeak.bed"))
peak.CTCF <- importPeak(here(refDir, "33248_CTCF_07-729_Bruce-4_peaks.mergePeak.bed"))
peak.temp <- fread(here(refDir, "33250_RAD21_ab992_Bruce-4_peaks.mergePeak.bed")) %>%
  dplyr::mutate(V2 = V2 - 1000,
                V3 = V3 + 1000)
colnames(peak.temp) <- c("chr", "start", "end")
peak.RAD21 <- makeGRangesFromDataFrame(peak.temp)


calculatePeakDensity <- function(peak, note){
  
  overlap_counts <- countOverlaps(anchor.up, peak)
  anchor_widths <- (width(anchor.up)-1)/1000
  density <- overlap_counts / anchor_widths
  data1 <- tibble(type = "UP",
                  densityPerKb = density)
  
  overlap_counts <- countOverlaps(anchor.no, peak)
  anchor_widths <- (width(anchor.no)-1)/1000
  density <- overlap_counts / anchor_widths
  mcols(anchor.no)$density <- density
  data2 <- tibble(type = "NO",
                  densityPerKb = density)
  
  overlap_counts <- countOverlaps(anchor.down, peak)
  anchor_widths <- (width(anchor.down)-1)/1000
  density <- overlap_counts / anchor_widths
  mcols(anchor.down)$density <- density
  data3 <- tibble(type = "DOWN",
                  densityPerKb = density)
  
  data <- bind_rows(data1, data2, data3)
  
  ggplot(data, aes(x = type, y = densityPerKb))  + geom_violin() + geom_boxplot(outlier.shape = NA, width = 0.1 ) + theme_bw() + ggtitle(note) +
    coord_cartesian(ylim = c(0, quantile(data$sumScore, 0.9)))
}

calculatePeakDensity(peak.H3K4me3, "H3K4me3")
calculatePeakDensity(peak.H3K27ac, "H3K27ac")
calculatePeakDensity(peak.CTCF, "CTCF")
calculatePeakDensity(peak.RAD21, "RAD21")

[2.27] Overlap with SE?

Q1. How many SEs are covered with Micro-C loops?

name <- "chromo_cons_annoHierarchy"
loop <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe"))
anchor <- extractAnchor(loop)

### Whyte
n <- length(unique(subjectHits(findOverlaps(anchor, peak.Whyte.SE))))
total <- length(peak.Whyte.SE)
counts <- c(n, total - n)
labels <- paste(c("covered", "NOT covered"), counts)
pie(counts, labels = labels,  main = paste0("Whyte SE coverage \n", total), col = c("grey", "white"))

fileName <- here(figDir, "pie_SE_Whyte")
width <- panelSize(4)*mmToInch
height <- panelSize(4)*mmToInch
# svglite(paste0(fileName, ".svg"), width = width, height =height)
# pie(counts, labels = labels,  main = paste0("Whyte SE coverage \n", total), col = c("grey", "white"),
#     cex = 1, cex.main = 1)
# dev.off()



### Dylan
n <- length(unique(subjectHits(findOverlaps(anchor, peak.Dylan.SE))))
total <- length(peak.Dylan.SE)
counts <- c(n, total - n)
labels <- paste(c("covered", "NOT covered"), counts)
pie(counts, labels = labels, main = paste0("Dylan SE coverage \n", total), col = c("grey", "white"))

fileName <- here(figDir, "pie_SE_Murphy")
width <- panelSize(4)*mmToInch
height <- panelSize(4)*mmToInch
# svglite(paste0(fileName, ".svg"), width = width, height =height)
# pie(counts, labels = labels, main = paste0("Murphy SE coverage \n", total), col = c("grey", "white"))
# 
# dev.off()

Q2. Fisher’s exact test?

dTAG
name <- "chromo_cons_annoHierarchy"
loop <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe"))

getOverlapLoopNum <- function(loop, peak){
  anchor1 <- GRanges(seqnames = loop$V1, ranges = IRanges(start = loop$V2, end = loop$V3))
  anchor2 <- GRanges(seqnames = loop$V4, ranges = IRanges(start = loop$V5, end = loop$V6))
  a <- queryHits(findOverlaps(anchor1, peak))
  b <- queryHits(findOverlaps(anchor2, peak))
  return(length(unique(c(a, b))))
}

getSEOverlapFisher <- function(allLoop, subsetLoop, peak){
  all.overlap <- getOverlapLoopNum(allLoop, peak)
  all.notOverlap <- nrow(allLoop) - all.overlap
  
  subset.overlap <- getOverlapLoopNum(subsetLoop, peak)
  subset.notOverlap <- nrow(subsetLoop) - subset.overlap
  
  contingency_table <- matrix(c(subset.overlap, subset.notOverlap,
                                all.overlap, all.notOverlap), nrow = 2, byrow = TRUE)
  colnames(contingency_table) <- c("Overlapping", "Not_Overlapping")
  rownames(contingency_table) <- c("All loops", "Subset loops")
  
  # Perform Fisher's Exact Test
  fisher_test_result <- fisher.test(contingency_table)
  return(fisher_test_result)
}



loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_UP_diff0.2.bedpe"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_NO_diff0.2.bedpe"))
loop.upno <- bind_rows(loop.up, loop.no)
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_DOWN_diff0.2.bedpe"))

### Dylan
# Seeding
temp <- getSEOverlapFisher(loop, loop.upno, peak.Dylan.SE)
result.tb <- tibble(loopType = "UP/NO",
                    target = "Dylan SE",
                    pvalue = temp$p.value,
                    oddsRatio = temp$estimate)

# temp <- getSEOverlapFisher(loop, loop.up, peak.Dylan.SE)
# result.tb <- tibble(loopType = "UP",
#                     target = "Dylan SE",
#                     pvalue = temp$p.value,
#                     oddsRatio = temp$estimate)
# Add row
# temp <- getSEOverlapFisher(loop, loop.no, peak.Dylan.SE)
# result.tb <- result.tb %>% 
#   add_row(loopType = "NO",
#           target = "Dylan SE",
#           pvalue = temp$p.value,
#           oddsRatio = temp$estimate)

temp <- getSEOverlapFisher(loop, loop.down, peak.Dylan.SE)
result.tb <- result.tb %>% 
  add_row(loopType = "DOWN",
          target = "Dylan SE",
          pvalue = temp$p.value,
          oddsRatio = temp$estimate)

### Whyte
# Seeding
# temp <- getSEOverlapFisher(loop, loop.up, peak.Whyte.SE)
# result.tb <- result.tb %>% 
#   add_row(loopType = "UP",
#                     target = "Whyte SE",
#                     pvalue = temp$p.value,
#                     oddsRatio = temp$estimate)

# Add row
temp <- getSEOverlapFisher(loop, loop.upno, peak.Whyte.SE)
result.tb <- result.tb %>% 
  add_row(loopType = "UP/NO",
          target = "Whyte SE",
          pvalue = temp$p.value,
          oddsRatio = temp$estimate)

temp <- getSEOverlapFisher(loop, loop.down, peak.Whyte.SE)
result.tb <- result.tb %>% 
  add_row(loopType = "DOWN",
          target = "Whyte SE",
          pvalue = temp$p.value,
          oddsRatio = temp$estimate)


# Visualization
library(circlize)
data <- result.tb
heatmap_data <- data %>% dplyr::select(target, loopType, oddsRatio) %>%
pivot_wider(names_from = loopType, values_from = oddsRatio) %>%
  column_to_rownames(var = "target")

pvalue_data <- data %>% dplyr::select(target, loopType, pvalue) %>%
  pivot_wider(names_from = loopType, values_from = pvalue) %>%
  column_to_rownames(var = "target")

col_fun <- colorRamp2(c(0, 1, 2), 
                      c("#4852A0", "white", "#CB333A"))

p <- Heatmap(as.matrix(heatmap_data),
        name = "Odds Ratio",
        col = col_fun,
        # Add annotation for p-values
        cell_fun = function(j, i, x, y, width, height, fill) {
          pval <- pvalue_data[i, j]
          label <- ifelse(pval > 0.05, "n.s.", sprintf("%.2e", pval))
          grid.text(label, x, y, gp = gpar(fontsize = fontSizeS, fontfamily = fontType))
        },
        # Customize the heatmap layout
        cluster_rows = TRUE,
        show_row_dend = FALSE,
        cluster_columns = FALSE,
        row_names_gp = gpar(fontsize = fontSizeS, fontfamily = fontType),
  column_names_gp = gpar(fontsize = fontSizeS, fontfamily = fontType),
        heatmap_legend_param = list(
          at = c(0, 1, 2),
          labels = c("0", "1", "2"),
          title_gp = gpar(fontfamily = fontType, fontsize = fontSizeS),
          labels_gp = gpar(fontfamily = fontType, fontsize = fontSizeS)
        )
)

# fileName <- here(figDir, "heatmap_SE_enrichment")
# width <- panelSize(1.5)*mmToInch
# height <- panelSize(0.7)*mmToInch
# svglite(paste0(fileName, ".svg"), width = width, height =height)
# print(p)
# dev.off()
# png(paste0(fileName, ".png"), width = width, height =height, res = 600, unit = "in")
# print(p)
# dev.off()

#######################

data$loopType <- factor(data$loopType, levels = c("UP/NO", "DOWN"))
data$target <- factor(data$target, levels = c("Whyte SE", "Dylan SE"))

p <- ggplot(data, aes(x = loopType, y = target, size = -log10(pvalue), fill = oddsRatio)) +
  geom_point(shape = 21,        # Ensures a point with an outline
             stroke = 1*ptToMM      # Line width for the border
  ) + theme_bw() + 
  scale_size_continuous(range = c(1, 3)) +  # Set min and max point sizes here
  scale_fill_gradientn(colors = c("#4852A0", "white", "#CB333A"),  # Define gradient colors
                      values = scales::rescale(c(0.5, 1, 1.5)), limits = c(0.5, 1.5), 
                      #low = "white", high = "#CB333A",
                      #                  limits = c(1, 3),
                      oob = scales::squish, # Define gradient colors
                      guide = guide_colorbar(
                        barwidth = 1.5/5.08,  # Adjust width of the color bar
                        barheight = 15/5.08   # Adjust height of the color bar
                      )
  ) +
  labs(x = NULL, y = NULL)  +
  theme(
    axis.title = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )

fileName <- here(figDir, "heatmap_SE_enrichment_dotplot")
width <- panelSize(1.8)*mmToInch
height <- panelSize(1.1)*mmToInch
# png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
# print(p)
# dev.off()
svglite(paste0(fileName, ".svg"),  height = height, width = width)
print(p)
dev.off()
A485
name <- "chromo_cons_annoHierarchy"
loop <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe"))

getOverlapLoopNum <- function(loop, peak){
  anchor1 <- GRanges(seqnames = loop$V1, ranges = IRanges(start = loop$V2, end = loop$V3))
  anchor2 <- GRanges(seqnames = loop$V4, ranges = IRanges(start = loop$V5, end = loop$V6))
  a <- queryHits(findOverlaps(anchor1, peak))
  b <- queryHits(findOverlaps(anchor2, peak))
  return(length(unique(c(a, b))))
}

getSEOverlapFisher <- function(allLoop, subsetLoop, peak){
  all.overlap <- getOverlapLoopNum(allLoop, peak)
  all.notOverlap <- nrow(allLoop) - all.overlap
  
  subset.overlap <- getOverlapLoopNum(subsetLoop, peak)
  subset.notOverlap <- nrow(subsetLoop) - subset.overlap
  
  contingency_table <- matrix(c(subset.overlap, subset.notOverlap,
                                all.overlap, all.notOverlap), nrow = 2, byrow = TRUE)
  colnames(contingency_table) <- c("Overlapping", "Not_Overlapping")
  rownames(contingency_table) <- c("All loops", "Subset loops")
  
  # Perform Fisher's Exact Test
  fisher_test_result <- fisher.test(contingency_table)
  return(fisher_test_result)
}



loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_UP_diff0.2.bedpe"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_NO_diff0.2.bedpe"))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_DOWN_diff0.2.bedpe"))

### Dylan
# Seeding
temp <- getSEOverlapFisher(loop, loop.up, peak.Dylan.SE)
result.tb <- tibble(loopType = "UP",
                    target = "Dylan SE",
                    pvalue = temp$p.value,
                    oddsRatio = temp$estimate)
# Add row
temp <- getSEOverlapFisher(loop, loop.no, peak.Dylan.SE)
result.tb <- result.tb %>% 
  add_row(loopType = "NO",
          target = "Dylan SE",
          pvalue = temp$p.value,
          oddsRatio = temp$estimate)

temp <- getSEOverlapFisher(loop, loop.down, peak.Dylan.SE)
result.tb <- result.tb %>% 
  add_row(loopType = "DOWN",
          target = "Dylan SE",
          pvalue = temp$p.value,
          oddsRatio = temp$estimate)

### Whyte
# Seeding
temp <- getSEOverlapFisher(loop, loop.up, peak.Whyte.SE)
result.tb <- result.tb %>% 
  add_row(loopType = "UP",
                    target = "Whyte SE",
                    pvalue = temp$p.value,
                    oddsRatio = temp$estimate)
# Add row
temp <- getSEOverlapFisher(loop, loop.no, peak.Whyte.SE)
result.tb <- result.tb %>% 
  add_row(loopType = "NO",
          target = "Whyte SE",
          pvalue = temp$p.value,
          oddsRatio = temp$estimate)

temp <- getSEOverlapFisher(loop, loop.down, peak.Whyte.SE)
result.tb <- result.tb %>% 
  add_row(loopType = "DOWN",
          target = "Whyte SE",
          pvalue = temp$p.value,
          oddsRatio = temp$estimate)


# Visualization
library(circlize)
data <- result.tb
heatmap_data <- data %>% dplyr::select(target, loopType, oddsRatio) %>%
pivot_wider(names_from = loopType, values_from = oddsRatio) %>%
  column_to_rownames(var = "target")

pvalue_data <- data %>% dplyr::select(target, loopType, pvalue) %>%
  pivot_wider(names_from = loopType, values_from = pvalue) %>%
  column_to_rownames(var = "target")

col_fun <- colorRamp2(c(0, 1, 5), 
                      c("blue", "white", "red"))

Heatmap(as.matrix(heatmap_data),
        name = "Odds Ratio",
        col = col_fun,
        # Add annotation for p-values
        cell_fun = function(j, i, x, y, width, height, fill) {
          pval <- pvalue_data[i, j]
          label <- ifelse(pval > 0.05, "n.s.", sprintf("%.2e", pval))
          grid.text(label, x, y, gp = gpar(fontsize = 10))
        },
        # Customize the heatmap layout
        cluster_rows = TRUE,
        show_row_dend = FALSE,
        cluster_columns = FALSE,
        column_title = "Interest",
        row_title = "Target",
        heatmap_legend_param = list(at = c(0, 1, 2, 3, 4, 5), 
                                    labels = c("0", "1", "2", "3", "4", "5")))

[2.28] Comparing differential loops in dTAG to A485

The question I want to ask here is whether perturbed loops in dTAG experiments are either perturbed or not perturbed in A485 experiment. If there is compensation going on between RAD21 and A485, UP loop in RAD21 should be more DOWN in A485 and vice versa #### Exploratory part

diffCutoff <- 0.2
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
data <- data %>%
    dplyr::mutate(updown_dTAG_DMSO = ifelse(diff_dTAG_DMSO > diffCutoff, "UP", 
                                            ifelse(diff_dTAG_DMSO > -diffCutoff, "NO", "DOWN")),
                  updown_A485_DMSO = ifelse(diff_A485_DMSO > diffCutoff, "UP", 
                                            ifelse(diff_A485_DMSO > -diffCutoff, "NO", "DOWN")))

 temp <- data
  temp$density <- get_density(temp$diff_dTAG_DMSO, temp$diff_A485_DMSO, n = 100)
  temp <- temp %>% dplyr::arrange(density)
  
  p1 <- ggplot(temp, aes(x = diff_dTAG_DMSO, y = diff_A485_DMSO, color = density)) +
    geom_point(show.legend = FALSE) + 
    scale_color_viridis() + 
    geom_hline(yintercept = diffCutoff, alpha = 1, color = "black") +
    geom_hline(yintercept = -diffCutoff, alpha = 1, color = "black") +
    geom_vline(xintercept = diffCutoff, alpha = 1, color = "black") +
    geom_vline(xintercept = -diffCutoff, alpha = 1, color = "black") +
    geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
    geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +coord_fixed(ratio = 1, ylim = c(-1, 1), xlim = c(-1, 1)) + 
    theme_classic()
  
  fileName <- paste0("scatterplot_", name, "_", diffCutoff, "_diff_", loopName, "_dTAG-", diffName)
  png(here(figDir, paste0(fileName, ".png")), res = 600, units = "in", width = 3.5, height = 3)
  print(p1)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")), 
          width = 3.5, height = 3)
  print(p1)
  dev.off()

Scatterplot & box plot

name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
data <- data %>%
    dplyr::mutate(updown_dTAG_DMSO = ifelse(diff_dTAG_DMSO > diffCutoff, "UP", 
                                            ifelse(diff_dTAG_DMSO > -diffCutoff, "NO", "DOWN")),
                  updown_A485_DMSO = ifelse(diff_A485_DMSO > diffCutoff, "UP", 
                                            ifelse(diff_A485_DMSO > -diffCutoff, "NO", "DOWN")))

makeAcrossSampleScatterplotdTAG <- function(data, AnnoList, diff, name, loopName, diffName){
  temp <- data %>% dplyr::filter(Anno2 %in% AnnoList,
                                 updown_dTAG_DMSO %in% diff)
  temp$density <- get_density(temp$diff_dTAG_DMSO, temp$diff_A485_DMSO, n = 100)
  temp <- temp %>% dplyr::arrange(density)

  p1 <- ggplot(temp, aes(x = diff_dTAG_DMSO, y = diff_A485_DMSO, color = density)) +
    geom_point(show.legend = FALSE) + 
    scale_color_viridis() + 
    geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
    geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
    geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
    geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
    geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +coord_fixed(ratio = 1, ylim = c(-1, 1), xlim = c(-1, 1)) + 
    theme_classic() + ggtitle(paste0(name, "_", loopName, "_dTAG-", diffName)) + theme(plot.title = element_text(size = 5))
  
  fileName <- paste0("scatterplot_", name, "_", diffCutoff, "_diff_", loopName, "_dTAG-", diffName)
  png(here(figDir, paste0(fileName, ".png")), res = 600, units = "in", width = 3.5, height = 3)
  print(p1)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")), 
          width = 3.5, height = 3)
  print(p1)
  dev.off()
}

makeAcrossSampleScatterplotdTAG(data, unique(data$Anno2), c("UP", "NO", "DOWN"), name, "all", "all")
makeAcrossSampleScatterplotdTAG(data, unique(data$Anno2), c("UP"), name, "all", "UP")
makeAcrossSampleScatterplotdTAG(data, unique(data$Anno2), c("NO"), name, "all", "NO")
makeAcrossSampleScatterplotdTAG(data, unique(data$Anno2), c("DOWN"), name, "all", "DOWN")

makeAcrossSampleScatterplotdTAG(data, c("P-P", "P-E", "E-E"), c("UP", "NO", "DOWN"), name, "pe-pe", "all")
makeAcrossSampleScatterplotdTAG(data, c("P-P", "P-E", "E-E"), c("UP"), name, "pe-pe", "UP")
makeAcrossSampleScatterplotdTAG(data, c("P-P", "P-E", "E-E"), c("NO"), name, "pe-pe", "NO")
makeAcrossSampleScatterplotdTAG(data, c("P-P", "P-E", "E-E"), c("DOWN"), name, "pe-pe", "DOWN")

makeAcrossSampleScatterplotdTAG(data, c("S-S", "S-X"), c("UP", "NO", "DOWN"), name, "str", "all")
makeAcrossSampleScatterplotdTAG(data, c("S-S", "S-X"), c("UP"), name, "str", "UP")
makeAcrossSampleScatterplotdTAG(data, c("S-S", "S-X"), c("NO"), name, "str", "NO")
makeAcrossSampleScatterplotdTAG(data, c("S-S", "S-X"), c("DOWN"), name, "str", "DOWN")

makeAcrossSampleScatterplotA485 <- function(data, AnnoList, diff, name, loopName, diffName){
  temp <- data %>% dplyr::filter(Anno2 %in% AnnoList,
                                 updown_A485_DMSO %in% diff)
  temp$density <- get_density(temp$diff_dTAG_DMSO, temp$diff_A485_DMSO, n = 100)
  temp <- temp %>% dplyr::arrange(density)

  p1 <- ggplot(temp, aes(x = diff_dTAG_DMSO, y = diff_A485_DMSO, color = density)) +
    geom_point(show.legend = FALSE) + 
    scale_color_viridis() + 
    geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
    geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
    geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
    geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
    geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +coord_fixed(ratio = 1, ylim = c(-1, 1), xlim = c(-1, 1)) + 
    theme_classic() + ggtitle(paste0(name, "_", loopName, "_A485G-", diffName)) + theme(plot.title = element_text(size = 5))
  
  fileName <- paste0("scatterplot_", name, "_", diffCutoff, "_diff_", loopName, "_A485-", diffName)
  png(here(figDir, paste0(fileName, ".png")), res = 600, units = "in", width = 3.5, height = 3)
  print(p1)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")), 
          width = 3.5, height = 3)
  print(p1)
  dev.off()
}

makeAcrossSampleScatterplotA485(data, unique(data$Anno2), c("UP", "NO", "DOWN"), name, "all", "all")
makeAcrossSampleScatterplotA485(data, unique(data$Anno2), c("UP"), name, "all", "UP")
makeAcrossSampleScatterplotA485(data, unique(data$Anno2), c("NO"), name, "all", "NO")
makeAcrossSampleScatterplotA485(data, unique(data$Anno2), c("DOWN"), name, "all", "DOWN")

makeAcrossSampleScatterplotA485(data, c("P-P", "P-E", "E-E"), c("UP", "NO", "DOWN"), name, "pe-pe", "all")
makeAcrossSampleScatterplotA485(data, c("P-P", "P-E", "E-E"), c("UP"), name, "pe-pe", "UP")
makeAcrossSampleScatterplotA485(data, c("P-P", "P-E", "E-E"), c("NO"), name, "pe-pe", "NO")
makeAcrossSampleScatterplotA485(data, c("P-P", "P-E", "E-E"), c("DOWN"), name, "pe-pe", "DOWN")

makeAcrossSampleScatterplotA485(data, c("S-S", "S-X"), c("UP", "NO", "DOWN"), name, "str", "all")
makeAcrossSampleScatterplotA485(data, c("S-S", "S-X"), c("UP"), name, "str", "UP")
makeAcrossSampleScatterplotA485(data, c("S-S", "S-X"), c("NO"), name, "str", "NO")
makeAcrossSampleScatterplotA485(data, c("S-S", "S-X"), c("DOWN"), name, "str", "DOWN")

######## Barplot
temp <- data %>% dplyr::select(updown_dTAG_DMSO, diff_A485_DMSO)
ggplot(temp, aes(x = updown_dTAG_DMSO, y = diff_A485_DMSO)) + geom_boxplot(outlier.shape = NA) + theme_classic() +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = 0.2, linetype = "dashed") +
  geom_hline(yintercept = -0.2, linetype = "dashed") + coord_cartesian(ylim = c(-0.5, 0.5))

temp <- data %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "E-E")) %>% dplyr::select(updown_dTAG_DMSO, diff_A485_DMSO) 
ggplot(temp, aes(x = updown_dTAG_DMSO, y = diff_A485_DMSO)) + geom_boxplot(outlier.shape = NA) + theme_classic() +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = 0.2, linetype = "dashed") +
  geom_hline(yintercept = -0.2, linetype = "dashed") + coord_cartesian(ylim = c(-0.5, 0.5))


temp <- data %>% dplyr::filter(Anno2 %in% c("S-S", "S-X")) %>% dplyr::select(updown_dTAG_DMSO, diff_A485_DMSO) 
ggplot(temp, aes(x = updown_dTAG_DMSO, y = diff_A485_DMSO)) + geom_boxplot(outlier.shape = NA) + theme_classic() +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = 0.2, linetype = "dashed") +
  geom_hline(yintercept = -0.2, linetype = "dashed") + coord_cartesian(ylim = c(-0.5, 0.5))

###
temp <- data %>% dplyr::select(updown_A485_DMSO, diff_dTAG_DMSO)
ggplot(temp, aes(x = updown_A485_DMSO, y = diff_dTAG_DMSO)) + geom_boxplot(outlier.shape = NA) + theme_classic() +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = 0.2, linetype = "dashed") +
  geom_hline(yintercept = -0.2, linetype = "dashed") + coord_cartesian(ylim = c(-0.5, 0.5))

temp <- data %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "E-E")) %>% dplyr:::select(updown_A485_DMSO, diff_dTAG_DMSO)
ggplot(temp, aes(x = updown_A485_DMSO, y = diff_dTAG_DMSO)) + geom_boxplot(outlier.shape = NA) + theme_classic() +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = 0.2, linetype = "dashed") +
  geom_hline(yintercept = -0.2, linetype = "dashed") + coord_cartesian(ylim = c(-0.5, 0.5))


temp <- data %>% dplyr::filter(Anno2 %in% c("S-S", "S-X")) %>% dplyr:::select(updown_A485_DMSO, diff_dTAG_DMSO)
ggplot(temp, aes(x = updown_A485_DMSO, y = diff_dTAG_DMSO)) + geom_boxplot(outlier.shape = NA) + theme_classic() +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = 0.2, linetype = "dashed") +
  geom_hline(yintercept = -0.2, linetype = "dashed") + coord_cartesian(ylim = c(-0.5, 0.5))

[2.29] Compartment analysis

Q. What are the changes in compartment level upon A485 treatment? #### Compartment change distribution

### IMPORTING COMPARMTNET SCORES
compDir <- here("../..", "result", "compartment", "CscoreTools")

cscore.DMSO <- as_tibble(fread(here(compDir, "G1DMSO_Merged_10kb_cscore_final.bedgraph"), skip = 1)) %>%
  dplyr::mutate(V2 = V2 + 1)
colnames(cscore.DMSO) <- c("chr", "start", "end", "cscore_DMSO")

cscore.A485 <- as_tibble(fread(here(compDir, "G1A485_Merged_10kb_cscore_final.bedgraph"), skip = 1)) %>%
  dplyr::mutate(V2 = V2 + 1)
colnames(cscore.A485) <- c("chr", "start", "end", "cscore_A485")


cscore <- dplyr::full_join(cscore.DMSO, cscore.A485, by = c("chr", "start", "end"))

# FILTER ROWS WITH NA
cscore <- cscore %>% filter(!if_any(everything(), is.na))

# Annotating how the compartment changed
threshold <- 0.1
cscore <- cscore %>% rowwise() %>% dplyr::mutate(isAB_DMSO = ifelse(cscore_DMSO > 0, "A", "B"),
                                                 isAB_A485 = ifelse(cscore_A485 > 0, "A", "B"),
                                                 changeType = case_when(
                                                   abs(cscore_DMSO - cscore_A485) < threshold ~ "Unchanged",
                                                   isAB_DMSO == "A" & isAB_A485 == "B" ~ "AtoB",
                                                   isAB_DMSO == "B" & isAB_A485 == "A" ~ "BtoA",
                                                   isAB_DMSO == "A" & isAB_A485 == "A" & abs(cscore_DMSO) > abs(cscore_A485) ~ "A_weakening",
                                                   isAB_DMSO == "A" & isAB_A485 == "A" & abs(cscore_DMSO) <= abs(cscore_A485) ~ "A_strengthening",
                                                   isAB_DMSO == "B" & isAB_A485 == "B" & abs(cscore_DMSO) > abs(cscore_A485) ~ "B_weakening",
                                                   isAB_DMSO == "B" & isAB_A485 == "B" & abs(cscore_DMSO) <= abs(cscore_A485) ~ "B_strengthening"                                                 ))
cscore_summary <- tibble(comparison = rep("A485_vs_DMSO", 7),
                         changeType = c("AtoB", "BtoA", "A_weakening", "A_strengthening", "B_weakening", "B_strengthening", "Unchanged"),
                         value = c(sum(cscore$changeType == "AtoB"),
                                   sum(cscore$changeType == "BtoA"),
                                   sum(cscore$changeType == "A_weakening"),
                                   sum(cscore$changeType == "A_strengthening"),
                                   sum(cscore$changeType == "B_weakening"),
                                   sum(cscore$changeType == "B_strengthening"),
                                   sum(cscore$changeType == "Unchanged")))
cscore_summary$changeType <- factor(cscore_summary$changeType, levels = c("A_weakening", "AtoB", "B_strengthening",
                                                                          "B_weakening", "BtoA", "A_strengthening",
                                                                          "Unchanged"))
ggplot(cscore_summary, aes(x = comparison, y = value, fill = changeType)) + geom_bar(position = "stack", stat = "identity") +
  theme_bw() +
  scale_fill_manual(values = c("skyblue", "blue", "darkblue", "pink", "red2", "darkred" ,"grey"))
  

Combining with RNA-seq

gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3)) %>%
  dplyr::select(V1, TSS, V6)
colnames(gene.tb) <- c("chr", "TSS", "ensembl")

diff.RNA <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
diff.RNA <- diff.RNA %>% dplyr::left_join(gene.tb, by = c("ensembl_gene_id" = "ensembl")) %>%
  dplyr::filter(!is.na(TSS))

alpha <- 0.05
fcCutoff <- 0.5

diff.RNA <- diff.RNA %>% dplyr::mutate(diff = case_when(padj < alpha & shrinked_log2FC > fcCutoff ~ "UP",
                                                        padj < alpha & shrinked_log2FC < -fcCutoff ~ "DOWN",
                                                        TRUE ~ "NO"))


getCompChangeType <- function(chrom, TSS, cscore.tb){
  temp.tb <- cscore.tb %>% dplyr::filter(chr == chrom, start < TSS, end > TSS)
  out <- temp.tb$changeType
  if(length(out) < 1){
    return(NA)
  }else{
    return(out)
  }
}

cscore.gr <- makeGRangesFromDataFrame(cscore[1:3])
temp <-diff.RNA %>% dplyr::mutate(start = TSS, end = TSS +1) %>%
  dplyr::select(chr, start, end)
diff.RNA.gr <- makeGRangesFromDataFrame(temp)
overlap <- findOverlaps(diff.RNA.gr, cscore.gr)

#cscore.selected <- cscore %>% dplyr::slice(subjectHits(overlap))
## Problem of certain genes not getting overlap with cscore due to sparse calling?
batch1 <- bind_cols(diff.RNA[queryHits(overlap)],
          data.table(cscore)[subjectHits(overlap)])


missed <- diff.RNA[-queryHits(overlap)]
temp <- missed %>% dplyr::mutate(TSS = TSS + 10000) %>% dplyr::mutate(start = TSS, end = TSS +1) %>%
  dplyr::select(chr, start, end)
missed.gr <- makeGRangesFromDataFrame(temp)
overlap <- findOverlaps(missed.gr, cscore.gr)
batch2 <- bind_cols(missed[queryHits(overlap)],
                    data.table(cscore)[subjectHits(overlap)])

missed2 <- missed[-queryHits(overlap)]
temp <- missed2 %>% dplyr::mutate(TSS = TSS - 10000) %>% dplyr::mutate(start = TSS, end = TSS +1) %>%
  dplyr::select(chr, start, end)
missed.gr <- makeGRangesFromDataFrame(temp)
overlap <- findOverlaps(missed.gr, cscore.gr)
batch3 <- bind_cols(missed2[queryHits(overlap)],
                    data.table(cscore)[subjectHits(overlap)])

#missed3 <- missed2[-queryHits(overlap)]


diff.RNA <- bind_rows(batch1, batch2, batch3)


# VISUALIZE
diff.RNA$changeType <- factor(diff.RNA$changeType, levels = c("A_weakening", "AtoB", "B_strengthening",
                                                                          "B_weakening", "BtoA", "A_strengthening",
                                                                          "Unchanged"))
ggplot(diff.RNA, aes(x = diff, fill = changeType)) +
  geom_bar(position = "fill") +
  labs(title = "Stacked Bar Plot of Change Type by Diff",
       x = "Diff",
       y = "Count") +
  theme_bw() +  scale_fill_manual(values = c("skyblue", "blue", "darkblue", "pink", "red2", "darkred" ,"grey"))

Combining with loop

name <- "chromo_cons_annoHierarchy"

# UP loop
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_UP_diff0.2.bedpe"))
anchor.up <- (extractAnchor(loop.up))
anchor.up.tb <- as_tibble(anchor.up) %>%
  dplyr::mutate(center = (start + end)/2) %>%
  dplyr::mutate(start = center-1, end = center+1) %>%
  dplyr::select(seqnames, start, end)
anchor.up <- makeGRangesFromDataFrame(anchor.up.tb)
# NO loop
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_NO_diff0.2.bedpe"))
anchor.no <- (extractAnchor(loop.no))
anchor.no.tb <- as_tibble(anchor.no) %>%
  dplyr::mutate(center = (start + end)/2) %>%
  dplyr::mutate(start = center-1, end = center+1) %>%
  dplyr::select(seqnames, start, end)
anchor.no <- makeGRangesFromDataFrame(anchor.no.tb)
# DOWN loop
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_DOWN_diff0.2.bedpe"))
anchor.down <- (extractAnchor(loop.down))
anchor.down.tb <- as_tibble(anchor.down) %>%
  dplyr::mutate(center = (start + end)/2) %>%
  dplyr::mutate(start = center-1, end = center+1) %>%
  dplyr::select(seqnames, start, end)
anchor.down <- makeGRangesFromDataFrame(anchor.down.tb)


########################################################################
### Overlap
cscore.gr <- makeGRangesFromDataFrame(cscore[1:3])
overlap <- findOverlaps(anchor.up, cscore.gr)

## Problem of certain genes not getting overlap with cscore due to sparse calling?
batch1 <- bind_cols(data.table(anchor.up.tb)[queryHits(overlap)],
          data.table(cscore)[subjectHits(overlap)])


missed <- data.table(anchor.up.tb)[-queryHits(overlap)]
temp <- missed %>% dplyr::mutate(start = start - 10000, end = end - 10000) %>%
  dplyr::select(seqnames, start, end)
missed.gr <- makeGRangesFromDataFrame(temp)
overlap <- findOverlaps(missed.gr, cscore.gr)
batch2 <- bind_cols((missed)[queryHits(overlap)],
          data.table(cscore)[subjectHits(overlap)])


data.up <- bind_rows(batch1, batch2)

########################################################################
### Overlap
cscore.gr <- makeGRangesFromDataFrame(cscore[1:3])
overlap <- findOverlaps(anchor.no, cscore.gr)

## Problem of certain genes not getting overlap with cscore due to sparse calling?
batch1 <- bind_cols(data.table(anchor.no.tb)[queryHits(overlap)],
          data.table(cscore)[subjectHits(overlap)])


missed <- data.table(anchor.no.tb)[-queryHits(overlap)]
temp <- missed %>% dplyr::mutate(start = start - 10000, end = end - 10000) %>%
  dplyr::select(seqnames, start, end)
missed.gr <- makeGRangesFromDataFrame(temp)
overlap <- findOverlaps(missed.gr, cscore.gr)
batch2 <- bind_cols((missed)[queryHits(overlap)],
          data.table(cscore)[subjectHits(overlap)])


data.no <- bind_rows(batch1, batch2)

########################################################################
### Overlap
cscore.gr <- makeGRangesFromDataFrame(cscore[1:3])
overlap <- findOverlaps(anchor.down, cscore.gr)

## Problem of certain genes not getting overlap with cscore due to sparse calling?
batch1 <- bind_cols(data.table(anchor.down.tb)[queryHits(overlap)],
          data.table(cscore)[subjectHits(overlap)])


missed <- data.table(anchor.down.tb)[-queryHits(overlap)]
temp <- missed %>% dplyr::mutate(start = start - 10000, end = end - 10000) %>%
  dplyr::select(seqnames, start, end)
missed.gr <- makeGRangesFromDataFrame(temp)
overlap <- findOverlaps(missed.gr, cscore.gr)
batch2 <- bind_cols((missed)[queryHits(overlap)],
          data.table(cscore)[subjectHits(overlap)])


data.down <- bind_rows(batch1, batch2)

n.up <- nrow(data.up)
n.no <- nrow(data.no)
n.down <- nrow(data.down)

data <- tibble(loopDiff = c(rep("UP", n.up), rep("NO", n.no), rep("DOWN", n.down)),
              changeType = c(data.up$changeType, data.no$changeType, data.down$changeType))

# VISUALIZE
data$changeType <- factor(data$changeType, levels = c("A_weakening", "AtoB", "B_strengthening",
                                                                          "B_weakening", "BtoA", "A_strengthening",
                                                                          "Unchanged"))
ggplot(data, aes(x = loopDiff, fill = changeType)) +
  geom_bar(position = "fill") +
  labs(title = "Stacked Bar Plot of Change Type by Diff",
       x = "Diff",
       y = "Count") +
  theme_bw() +  scale_fill_manual(values = c("skyblue", "blue", "darkblue", "pink", "red2", "darkred" ,"grey"))

[2.30] ChromHMM

resultDir <- here("../../result")

data <- fread(here(resultDir, "chromHMM", "A485_pe-pe_anchors", "overlap_enrich_100_state.txt"))
colnames(data) <- c("state", "genome", "pe-pe_A485_down", "pe-pe_A485_no", "pe-pe_A485_up")
data <- data %>% dplyr::select(-genome)

data_matrix <- data %>%
  column_to_rownames(var = "state") %>%
  as.matrix()

library(circlize)

col_fun <- colorRamp2(c(0, 1, 10), 
                      c("blue", "white", "red"))

Heatmap(
  data_matrix,
  name = "Value",
  show_row_names = TRUE,
  show_column_names = TRUE,
  cluster_columns = FALSE,
  cluster_rows = FALSE,
   col = col_fun
)

#######
data2 <- data %>% dplyr::select(c(1, 2, 4))
data2 <- data2 %>% dplyr::filter(`pe-pe_A485_down` > `pe-pe_A485_up`)
data_matrix <- data2 %>%
  column_to_rownames(var = "state") %>%
  as.matrix()

library(circlize)

col_fun <- colorRamp2(c(0, 1, 10), 
                      c("blue", "white", "red"))

Heatmap(
  data_matrix,
  name = "Value",
  show_row_names = TRUE,
  show_column_names = TRUE,
  cluster_columns = FALSE,
  cluster_rows = FALSE,
   col = col_fun
)

[2.31] Making Visualization Easy

https://bioconductor.org/books/devel/OHCA/pages/visualization.html

### Getting loop ID
temp <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe")) %>%
  dplyr::mutate(res = V3 - V2,
                id = paste(V1, res, V2, V5, sep = "_"))
regID <- temp$id

temp <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure.bedpe")) %>%
  dplyr::mutate(res = V3 - V2,
                id = paste(V1, res, V2, V5, sep = "_"))
strID <- temp$id

############## REG
## Filtering loops to plot
data <- fread(here(consensusDir, "chromo_cons_score.tsv"))
temp <- data %>% dplyr::filter(id %in% regID, dTAG > 0.5) %>% dplyr::arrange(desc(dTAG))
temp <- temp %>% dplyr::select(seq(1, 6))
colnames(temp) <- c("V1", "V2", "V3", "V4", "V5", "V6")
loops <- importBedpe(temp)

# Visualization
hicDir <- "/Volumes/UKJIN_SSD/data_vault_2024summer_microC/hic"
windowSize <- 1*1e6

i = 1
# for(i in seq(1, 25)){
  ## Loading hic and plotting
  chr <- as_tibble(loops[i])$seqnames1
  center <- (as_tibble(loops[i])$start1 + as_tibble(loops[i])$end2)/2
  start <- floor(center - 0.5*windowSize)
  end <- floor(center + 0.5*windowSize)
  
  cf.G1DMSO <- HicFile(path = here(hicDir, "G1.DMSO.Merged.hic"))
  cf.G1dTAG <- HicFile(path = here(hicDir, "G1.dTAG.Merged.hic"))
  #cf.G1dTAG <- HicFile(path = here(hicDir, "G1.A485.Merged.hic"))

  res <- 10*1000
  zmax <- 2.5
  hic1 <- import(cf.G1DMSO, focus = paste0(chr, ":", start, "-", end), resolution = res)
  hic2 <- import(cf.G1dTAG, focus = paste0(chr, ":", start, "-", end), resolution = res)
  p1 <- plotMatrix(hic1, dpi = 1000, limits = c(0, zmax), loop = loops)
  p2 <- plotMatrix(hic2, dpi = 1000, limits = c(0, zmax), loop = loops)
  
  fileName <- here(figDir, paste0("visuzliation_regLoop_dTAG_", i, "_10kb_figureVer"))
  
  width <- panelSize(20)*mmToInch
  height <- panelSize(10)*mmToInch
  
  png(paste0(fileName, ".png"), res = 600, units = "in", width = width, height = height)
  print(cowplot::plot_grid(p1, p2, align = "h"))
  dev.off()
  svglite(paste0(fileName, ".svg"), width = width, height = height)
  print(cowplot::plot_grid(p1, p2, align = "h"))
  dev.off()
  
  res <- 25*1000
  zmax <- 3
  start <- floor(center - windowSize)
  end <- floor(center + windowSize)
  hic1 <- import(cf.G1DMSO, focus = paste0(chr, ":", start, "-", end), resolution = res)
  hic2 <- import(cf.G1dTAG, focus = paste0(chr, ":", start, "-", end), resolution = res)
  p1 <- plotMatrix(hic1, dpi = 1000, limits = c(0, zmax), loop = loops)
  p2 <- plotMatrix(hic2, dpi = 1000, limits = c(0, zmax), loop = loops)
  
  fileName <- here(figDir, paste0("visuzliation_regLoop_dTAG_", i, "_25kb"))
  png(paste0(fileName, ".png"), res = 600, units = "in", width = 10, height = 5)
  print(cowplot::plot_grid(p1, p2, align = "h"))
  dev.off()
  # svglite(paste0(fileName, ".svg"), width = 10, height = 5)
  # print(cowplot::plot_grid(p1, p2, align = "h"))
  # dev.off()
# }

############## Str
## Filtering loops to plot
data <- fread(here(consensusDir, "chromo_cons_score.tsv"))
temp <- data %>% dplyr::filter(id %in% strID, dTAG > 0.5) %>% dplyr::arrange(desc(dTAG))
temp <- temp %>% dplyr::select(seq(1, 6))
colnames(temp) <- c("V1", "V2", "V3", "V4", "V5", "V6")
loops <- importBedpe(temp)

# Visualization
hicDir <- "/Volumes/UKJIN_SSD/Genomics_03_Analysis_Working/data_vault_2024summer_microC/hic"
windowSize <- 2*1e6

for(i in seq(1, 25)){
  ## Loading hic and plotting
  chr <- as_tibble(loops[i])$seqnames1
  center <- (as_tibble(loops[i])$start1 + as_tibble(loops[i])$end2)/2
  start <- floor(center - 0.5*windowSize)
  end <- floor(center + 0.5*windowSize)
  
  cf.G1DMSO <- HicFile(path = here(hicDir, "G1.DMSO.Merged.hic"))
  cf.G1dTAG <- HicFile(path = here(hicDir, "G1.dTAG.Merged.hic"))

  res <- 10*1000
  zmax <- 2.5
  hic1 <- import(cf.G1DMSO, focus = paste0(chr, ":", start, "-", end), resolution = res)
  hic2 <- import(cf.G1dTAG, focus = paste0(chr, ":", start, "-", end), resolution = res)
  p1 <- plotMatrix(hic1, dpi = 1000, limits = c(0, zmax), loop = loops)
  p2 <- plotMatrix(hic2, dpi = 1000, limits = c(0, zmax), loop = loops)
  
  fileName <- here(figDir, paste0("visuzliation_strLoop_dTAG_", i, "_10kb"))
  png(paste0(fileName, ".png"), res = 600, units = "in", width = 10, height = 5)
  print(cowplot::plot_grid(p1, p2, align = "h"))
  dev.off()
  
  
  res <- 25*1000
  zmax <- 3
  start <- floor(center - windowSize)
  end <- floor(center + windowSize)
  hic1 <- import(cf.G1DMSO, focus = paste0(chr, ":", start, "-", end), resolution = res)
  hic2 <- import(cf.G1dTAG, focus = paste0(chr, ":", start, "-", end), resolution = res)
  p1 <- plotMatrix(hic1, dpi = 1000, limits = c(0, zmax), loop = loops)
  p2 <- plotMatrix(hic2, dpi = 1000, limits = c(0, zmax), loop = loops)
  
  fileName <- here(figDir, paste0("visuzliation_strLoop_dTAG_", i, "_25kb"))
  png(paste0(fileName, ".png"), res = 600, units = "in", width = 10, height = 5)
  print(cowplot::plot_grid(p1, p2, align = "h"))
  dev.off()
}

############## dTAG called loops
## Filtering loops to plot
data1 <- fread(here(loopDir, "G1.dTAG.Merged_chromosight_25kb.tsv"))
data2 <- fread(here(loopDir, "G1.dTAG.Merged_chromosight_10kb.tsv"))
data3 <- fread(here(loopDir, "G1.dTAG.Merged_chromosight_5kb.tsv"))
data <- bind_rows(data1, data2, data3)

temp <- data %>% dplyr::arrange(desc(score))

temp <- temp %>% dplyr::select(seq(1, 6))
colnames(temp) <- c("V1", "V2", "V3", "V4", "V5", "V6")
loops <- importBedpe(temp)

# Visualization
hicDir <- "/Volumes/UKJIN_SSD/Genomics_03_Analysis_Working/data_vault_2024summer_microC/hic"
windowSize <- 2*1e6

for(i in seq(1, 25)){
  ## Loading hic and plotting
  chr <- as_tibble(loops[i])$seqnames1
  center <- (as_tibble(loops[i])$start1 + as_tibble(loops[i])$end2)/2
  start <- floor(center - 0.5*windowSize)
  end <- floor(center + 0.5*windowSize)
  
  cf.G1DMSO <- HicFile(path = here(hicDir, "G1.DMSO.Merged.hic"))
  cf.G1dTAG <- HicFile(path = here(hicDir, "G1.dTAG.Merged.hic"))

  res <- 10*1000
  zmax <- 2.5
  hic1 <- import(cf.G1DMSO, focus = paste0(chr, ":", start, "-", end), resolution = res)
  hic2 <- import(cf.G1dTAG, focus = paste0(chr, ":", start, "-", end), resolution = res)
  p1 <- plotMatrix(hic1, dpi = 1000, limits = c(0, zmax), loop = loops)
  p2 <- plotMatrix(hic2, dpi = 1000, limits = c(0, zmax), loop = loops)
  
  fileName <- here(figDir, paste0("visuzliation_dTAGcalledLoop_dTAG_", i, "_10kb"))
  png(paste0(fileName, ".png"), res = 600, units = "in", width = 10, height = 5)
  print(cowplot::plot_grid(p1, p2, align = "h"))
  dev.off()
  
  
  res <- 25*1000
  zmax <- 3
  start <- floor(center - windowSize)
  end <- floor(center + windowSize)
  hic1 <- import(cf.G1DMSO, focus = paste0(chr, ":", start, "-", end), resolution = res)
  hic2 <- import(cf.G1dTAG, focus = paste0(chr, ":", start, "-", end), resolution = res)
  p1 <- plotMatrix(hic1, dpi = 1000, limits = c(0, zmax), loop = loops)
  p2 <- plotMatrix(hic2, dpi = 1000, limits = c(0, zmax), loop = loops)
  
  fileName <- here(figDir, paste0("visuzliation_dTAGcalledLoop_dTAG_", i, "_25kb"))
  png(paste0(fileName, ".png"), res = 600, units = "in", width = 10, height = 5)
  print(cowplot::plot_grid(p1, p2, align = "h"))
  dev.off()
}

chr = "chr13"
start = 96900000
end = 98100000
  res <- 10*1000
  zmax <- 2.5
  hic1 <- import(cf.G1DMSO, focus = paste0(chr, ":", start, "-", end), resolution = res)
  hic2 <- import(cf.G1dTAG, focus = paste0(chr, ":", start, "-", end), resolution = res)
  p1 <- plotMatrix(hic1, dpi = 1000, limits = c(0, zmax))
  p2 <- plotMatrix(hic2, dpi = 1000, limits = c(0, zmax))
    print(cowplot::plot_grid(p1, p2, align = "h"))

[2.32] Heatmap of loops

Strategy: check overlap of the union loops to the loops called at each condition #### Check sample specific called loops

library(circlize)

name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
temp <- data %>% dplyr::select(seq(1, 6))
colnames(temp) <- c("V1", "V2", "V3", "V4", "V5", "V6")
cons.loop <- importBedpe(temp)

# Checking DMSO
loop.25kb <- importBedpe(fread(here(loopDir, paste0("G1.DMSO.Merged_chromosight_", 25, "kb.bedpe"))))
loop.10kb <- importBedpe(fread(here(loopDir, paste0("G1.DMSO.Merged_chromosight_", 10, "kb.bedpe"))))
loop.5kb <- importBedpe(fread(here(loopDir, paste0("G1.DMSO.Merged_chromosight_", 5, "kb.bedpe"))))


overlap.25kb <- findOverlaps(cons.loop, loop.25kb)
overlap.10kb <- findOverlaps(cons.loop, loop.10kb)
overlap.5kb <- findOverlaps(cons.loop, loop.5kb)

index <- sort(unique(c(queryHits(overlap.25kb),
         queryHits(overlap.10kb),
         queryHits(overlap.5kb))))
data$calledByDMSO <- 0
data$calledByDMSO[index] <- 1

# Checking dTAG
loop.25kb <- importBedpe(fread(here(loopDir, paste0("G1.dTAG.Merged_chromosight_", 25, "kb.bedpe"))))
loop.10kb <- importBedpe(fread(here(loopDir, paste0("G1.dTAG.Merged_chromosight_", 10, "kb.bedpe"))))
loop.5kb <- importBedpe(fread(here(loopDir, paste0("G1.dTAG.Merged_chromosight_", 5, "kb.bedpe"))))


overlap.25kb <- findOverlaps(cons.loop, loop.25kb)
overlap.10kb <- findOverlaps(cons.loop, loop.10kb)
overlap.5kb <- findOverlaps(cons.loop, loop.5kb)

index <- sort(unique(c(queryHits(overlap.25kb),
         queryHits(overlap.10kb),
         queryHits(overlap.5kb))))
data$calledBydTAG <- 0
data$calledBydTAG[index] <- 1

# Checking A485
loop.25kb <- importBedpe(fread(here(loopDir, paste0("G1.A485.Merged_chromosight_", 25, "kb.bedpe"))))
loop.10kb <- importBedpe(fread(here(loopDir, paste0("G1.A485.Merged_chromosight_", 10, "kb.bedpe"))))
loop.5kb <- importBedpe(fread(here(loopDir, paste0("G1.A485.Merged_chromosight_", 5, "kb.bedpe"))))


overlap.25kb <- findOverlaps(cons.loop, loop.25kb)
overlap.10kb <- findOverlaps(cons.loop, loop.10kb)
overlap.5kb <- findOverlaps(cons.loop, loop.5kb)

index <- sort(unique(c(queryHits(overlap.25kb),
         queryHits(overlap.10kb),
         queryHits(overlap.5kb))))
data$calledByA485 <- 0
data$calledByA485[index] <- 1


# Heatmap (checking whether certain peak is called by specific sample)
dataToPlot <- data %>% dplyr::select(id, calledByDMSO, calledBydTAG, calledByA485)
dataToPlot <- dataToPlot %>% dplyr::mutate(flag = 4*calledByDMSO + 2*calledBydTAG + calledByA485) %>%
  dplyr::arrange(desc(flag))

row_groups <- dataToPlot$flag
data_matrix <- dataToPlot %>% column_to_rownames(var = "id") %>% dplyr::select(-flag) %>% as.matrix()
col_fun <- colorRamp2(c(0, 1), 
                      c("white", "green"))
h1 <- Heatmap(data_matrix, name = "calledBy",
        cluster_columns = FALSE,
        cluster_rows = FALSE,
        show_row_dend = FALSE,
        border = TRUE,
        show_row_names = FALSE,
        col = col_fun,
        row_split = row_groups)


# Prepare data for the second heatmap
dataToPlot2 <- data %>% dplyr::select(id, DMSO, dTAG, A485) %>%
  arrange(match(id, dataToPlot$id))

data_matrix2 <- dataToPlot2 %>% column_to_rownames(var = "id") %>% as.matrix()


col_fun2 <- colorRamp2(c(-0.5, 0, 1), c("blue", "white", "red"))

h2 <- Heatmap(data_matrix2, name = "score",
              cluster_columns = FALSE,
              cluster_rows = TRUE,
              show_row_dend = FALSE,
              border = TRUE,
              show_row_names = FALSE,
              col = col_fun2,
              row_split = row_groups)


fileName <- here(figDir, paste0("heatmap_checkingSampleCalledLoops"))
png(paste0(fileName, ".png"), res = 600, units = "in", width = 3, height = 10)
print(h1 + h2)
dev.off()

Checking updown

All loops
# Heatmap
library(circlize)

# Heatmap 2
set.seed(123)

diffCutoff <- 0.2
dataToPlot <- data %>% dplyr::mutate(isdiff_dTAG_DMSO = 
                                       case_when(diff_dTAG_DMSO >= diffCutoff ~ 1,
                                                 abs(diff_dTAG_DMSO) < diffCutoff ~ 0,
                                                 diff_dTAG_DMSO <= -diffCutoff ~ -1,
                                                 TRUE ~ NA),
                                     isdiff_A485_DMSO = 
                                       case_when(diff_A485_DMSO >= diffCutoff ~ 1,
                                                 abs(diff_A485_DMSO) < diffCutoff ~ 0,
                                                 diff_A485_DMSO <= -diffCutoff ~ -1,
                                                 TRUE ~ NA)) %>%
  dplyr::select(id, isdiff_dTAG_DMSO, isdiff_A485_DMSO)
data_matrix <- dataToPlot %>% column_to_rownames(var = "id")%>% as.matrix()
col_fun <- colorRamp2(c(-1, 0, 1), c("blue", "grey", "red"))
h1 <- Heatmap(data_matrix, name = "loop score",
        cluster_columns = FALSE,
        cluster_rows = FALSE,
        row_km = 9,
        show_row_dend = FALSE,
        border = FALSE,
        show_row_names = FALSE,
        col = col_fun)

fileName <- here(figDir, paste0("heatmap_checkingDeltaAcrossCondition"))
png(paste0(fileName, ".png"), res = 600, units = "in", width = 2, height = 10)
print(h1)
dev.off()

set.seed(123)

hm_drawn <- draw(h1)
row_clusters <- row_order(hm_drawn)

loop.cluster1 <- temp[row_clusters[[1]]]
loop.cluster2 <- temp[row_clusters[[2]]]
loop.cluster3 <- temp[row_clusters[[3]]]
loop.cluster4 <- temp[row_clusters[[4]]]
loop.cluster5 <- temp[row_clusters[[5]]]
loop.cluster6 <- temp[row_clusters[[6]]]
loop.cluster7 <- temp[row_clusters[[7]]]
loop.cluster8 <- temp[row_clusters[[8]]]
loop.cluster9 <- temp[row_clusters[[9]]]
Reg loops
# Heatmap
library(circlize)

# Heatmap 2
set.seed(123)

diffCutoff <- 0.2
dataToPlot <- data %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "E-E")) %>%
  dplyr::mutate(isdiff_dTAG_DMSO = 
                  case_when(diff_dTAG_DMSO >= diffCutoff ~ 1,
                            abs(diff_dTAG_DMSO) < diffCutoff ~ 0,
                            diff_dTAG_DMSO <= -diffCutoff ~ -1,
                            TRUE ~ NA),
                isdiff_A485_DMSO = 
                  case_when(diff_A485_DMSO >= diffCutoff ~ 1,
                            abs(diff_A485_DMSO) < diffCutoff ~ 0,
                            diff_A485_DMSO <= -diffCutoff ~ -1,
                            TRUE ~ NA)) %>%
  dplyr::select(id, isdiff_dTAG_DMSO, isdiff_A485_DMSO)
data_matrix <- dataToPlot %>% column_to_rownames(var = "id")%>% as.matrix()
col_fun <- colorRamp2(c(-1, 0, 1), c("blue", "grey", "red"))
h1 <- Heatmap(data_matrix, name = "loop score",
              cluster_columns = FALSE,
              cluster_rows = FALSE,
        row_km = 8,
        show_row_dend = FALSE,
        border = FALSE,
        show_row_names = FALSE,
        col = col_fun)

fileName <- here(figDir, paste0("heatmap_checkingDeltaAcrossCondition_reg"))
png(paste0(fileName, ".png"), res = 600, units = "in", width = 2, height = 10)
print(h1)
dev.off()

set.seed(123)

hm_drawn <- draw(h1)
row_clusters <- row_order(hm_drawn)

loop.cluster1 <- temp[row_clusters[[1]]]
loop.cluster2 <- temp[row_clusters[[2]]]
loop.cluster3 <- temp[row_clusters[[3]]]
loop.cluster4 <- temp[row_clusters[[4]]]
loop.cluster5 <- temp[row_clusters[[5]]]
loop.cluster6 <- temp[row_clusters[[6]]]
loop.cluster7 <- temp[row_clusters[[7]]]
loop.cluster8 <- temp[row_clusters[[8]]]
library(circlize)

set.seed(123)
dataToPlot <- data %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "E-E")) %>%
  dplyr::select(id, diff_dTAG_DMSO, diff_A485_DMSO)
data_matrix <- dataToPlot %>% column_to_rownames(var = "id")%>% as.matrix()
col_fun <- colorRamp2(c(-1, 0, 1), c("blue", "white", "red"))
h1 <- Heatmap(data_matrix, name = "loop score",
              cluster_columns = FALSE,
              cluster_rows = FALSE,
        row_km = 4,
        show_row_dend = FALSE,
        border = TRUE,
        show_row_names = FALSE,
        col = col_fun)


fileName <- here(figDir, paste0("heatmap_diffScore_reg_k4"))
png(paste0(fileName, ".png"), res = 600, units = "in", width = 2, height = 6)
print(h1)
dev.off()

hm_drawn <- draw(h1)
row_clusters <- row_order(hm_drawn)

loop.cluster1 <- temp[row_clusters[[1]]]
loop.cluster2 <- temp[row_clusters[[2]]]
loop.cluster3 <- temp[row_clusters[[3]]]
loop.cluster4 <- temp[row_clusters[[4]]]
All loops _OE
# Heatmap
library(circlize)

name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
# Heatmap 2
set.seed(123)

# Import obs/exp scores and merge to the dataset
minValue <- -4
diffCutoff <- 0.5
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
    dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                  log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
                  log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)))


# Merge dataset
dataToPlot <- data %>% dplyr::left_join(obsexp, by = c("id")) %>%
  dplyr::mutate(log_obsexp_diff_dTAG_DMSO = log_obsexp_dTAG - log_obsexp_DMSO,
                log_obsexp_diff_A485_DMSO = log_obsexp_A485 - log_obsexp_DMSO,
                isdiff_dTAG_DMSO = case_when(log_obsexp_diff_dTAG_DMSO >= diffCutoff ~ 1,
                                             abs(log_obsexp_diff_dTAG_DMSO) < diffCutoff ~ 0,
                                             log_obsexp_diff_dTAG_DMSO <= diffCutoff ~ -1,
                                             TRUE ~ NA ),
                isdiff_A485_DMSO = case_when(log_obsexp_diff_A485_DMSO >= diffCutoff ~ 1,
                                             abs(log_obsexp_diff_A485_DMSO) < diffCutoff ~ 0,
                                             log_obsexp_diff_A485_DMSO <= diffCutoff ~ -1,
                                             TRUE ~ NA))%>%
  dplyr::select(id, isdiff_dTAG_DMSO, isdiff_A485_DMSO)



data_matrix <- dataToPlot %>% column_to_rownames(var = "id")%>% as.matrix()
col_fun <- colorRamp2(c(-1, 0, 1), c("blue", "grey", "red"))
h1 <- Heatmap(data_matrix, name = "loop score",
        cluster_columns = FALSE,
        cluster_rows = FALSE,
        row_km = 9,
        show_row_dend = FALSE,
        border = FALSE,
        show_row_names = FALSE,
        col = col_fun)

fileName <- here(figDir, paste0("heatmap_checkingDeltaAcrossCondition_OE"))
png(paste0(fileName, ".png"), res = 600, units = "in", width = 2, height = 10)
print(h1)
dev.off()
# 
# set.seed(123)
# 
# hm_drawn <- draw(h1)
# row_clusters <- row_order(hm_drawn)
# 
# loop.cluster1 <- temp[row_clusters[[1]]]
# loop.cluster2 <- temp[row_clusters[[2]]]
# loop.cluster3 <- temp[row_clusters[[3]]]
# loop.cluster4 <- temp[row_clusters[[4]]]
# loop.cluster5 <- temp[row_clusters[[5]]]
# loop.cluster6 <- temp[row_clusters[[6]]]
# loop.cluster7 <- temp[row_clusters[[7]]]
# loop.cluster8 <- temp[row_clusters[[8]]]
# loop.cluster9 <- temp[row_clusters[[9]]]
Reg loops _OE
# Heatmap
library(circlize)

name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
# Heatmap 2
set.seed(123)

# Import obs/exp scores and merge to the dataset
minValue <- -4
diffCutoff <- 0.5
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
    dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                  log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
                  log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)))


# Merge dataset
dataToPlot <- data %>% dplyr::left_join(obsexp, by = c("id")) %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "E-E")) %>%
  dplyr::mutate(log_obsexp_diff_dTAG_DMSO = log_obsexp_dTAG - log_obsexp_DMSO,
                log_obsexp_diff_A485_DMSO = log_obsexp_A485 - log_obsexp_DMSO,
                isdiff_dTAG_DMSO = case_when(log_obsexp_diff_dTAG_DMSO >= diffCutoff ~ 1,
                                             abs(log_obsexp_diff_dTAG_DMSO) < diffCutoff ~ 0,
                                             log_obsexp_diff_dTAG_DMSO <= diffCutoff ~ -1,
                                             TRUE ~ NA ),
                isdiff_A485_DMSO = case_when(log_obsexp_diff_A485_DMSO >= diffCutoff ~ 1,
                                             abs(log_obsexp_diff_A485_DMSO) < diffCutoff ~ 0,
                                             log_obsexp_diff_A485_DMSO <= diffCutoff ~ -1,
                                             TRUE ~ NA))%>%
  dplyr::select(id, isdiff_dTAG_DMSO, isdiff_A485_DMSO)



data_matrix <- dataToPlot %>% column_to_rownames(var = "id")%>% as.matrix()
col_fun <- colorRamp2(c(-1, 0, 1), c("blue", "grey", "red"))
h1 <- Heatmap(data_matrix, name = "loop score",
        cluster_columns = FALSE,
        cluster_rows = FALSE,
        row_km = 9,
        show_row_dend = FALSE,
        border = FALSE,
        show_row_names = FALSE,
        col = col_fun)

fileName <- here(figDir, paste0("heatmap_checkingDeltaAcrossCondition_reg_OE"))
png(paste0(fileName, ".png"), res = 600, units = "in", width = 2, height = 10)
print(h1)
dev.off()
# 
# set.seed(123)
# 
# hm_drawn <- draw(h1)
# row_clusters <- row_order(hm_drawn)
# 
# loop.cluster1 <- temp[row_clusters[[1]]]
# loop.cluster2 <- temp[row_clusters[[2]]]
# loop.cluster3 <- temp[row_clusters[[3]]]
# loop.cluster4 <- temp[row_clusters[[4]]]
# loop.cluster5 <- temp[row_clusters[[5]]]
# loop.cluster6 <- temp[row_clusters[[6]]]
# loop.cluster7 <- temp[row_clusters[[7]]]
# loop.cluster8 <- temp[row_clusters[[8]]]
# loop.cluster9 <- temp[row_clusters[[9]]]

[2.32] Comparing with Bobbie’s data

Where does differentiated related genes fall into?

marginError <- function(myList) {
  sample.n = length(myList)
  sample.sd = sd(myList)
  sample.se = sample.sd/sqrt(sample.n)
  alpha = 0.05
  degrees.freedom = sample.n - 1
  t.score = qt(p = alpha/2, df = degrees.freedom, lower.tail = F)
  margin.error = t.score*sample.se
  return(margin.error)
}

PROseq.bobbie <- as_tibble(fread(here(refDir, "bobbie_gene_classification.csv")))

geneList.Epi.dTAG.up <- (fread(here(refDir, "diff_G1.dTAG_G1.Epi.dTAG_vs_G1.Epi.DMSO.tsv")) %>% dplyr::filter(padj < 0.05, log2FoldChange > 0))$ensembl_gene_id

geneList.Epi.dTAG.down <- (fread(here(refDir, "diff_G1.dTAG_G1.Epi.dTAG_vs_G1.Epi.DMSO.tsv")) %>% dplyr::filter(padj < 0.05, log2FoldChange < 0))$ensembl_gene_id

## Converting transcript ID to gene ID
idPair_tg <- getBM(attributes = c("ensembl_transcript_id", "ensembl_gene_id"),
                 filters = "ensembl_transcript_id",
                 values = PROseq.bobbie$enst,
                 mart = ensembl.v102)
PROseq.bobbie <- PROseq.bobbie %>% dplyr::left_join(idPair_tg, by = c("enst" = "ensembl_transcript_id"))

## Bar Plot
temp <- PROseq.bobbie %>% dplyr::filter(ensembl_gene_id %in% geneList.Epi.dTAG.down)

group <- c(rep("EpiLC_dTAGvsDMSO_DOWN", 4))
cluster <- rep(c("Early", "Middle", "Late", "Transient"), 1)
cluster <- factor(cluster, levels = c("Early", "Middle", "Late", "Transient"))
value <- c(sum(temp$Cluster == "Early"),
           sum(temp$Cluster == "Middle"),
           sum(temp$Cluster == "Late"),
           sum(temp$Cluster == "Transient"))
data <- data.frame(group, cluster, value)
ggplot(data, aes(fill=cluster, y=value, x=group)) + 
    geom_bar(position="fill", stat="identity") + theme_classic()

## Line plot
temp <- temp %>% dplyr::select(c(4, 5, 6, 7, 8, 9))
temp.tall = temp %>% pivot_longer(-c(1, 2), names_to = "timepoints", values_to = "value")

gg1 = ggplot(temp.tall,
       aes( x = factor(timepoints, level = c("MIT", "EG1", "LG1", "ASYN")),
            y = value,
            group = enst, col = enst)) +
  geom_line() +
  theme_classic() +
  ggtitle("Bobbie, PRO-seq", subtitle = "selected genes") +
  xlab ("timepoints") + ylab("value") +
  theme(plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5),
        legend.position = "none",
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

ggsave(filename = here(figDir, "bobbie_gene_EpiLC_dTAGvsDMSO_DOWN.png"), gg1, width = 4, height = 4, dpi = 300, units = "in", device = "png")


# DRAW RIBBON


temp.ribbon = tibble("timepoints" = c("MIT", "EG1", "LG1", "ASYN"),
                      "value" = c(mean(temp$MIT), mean(temp$EG1), mean(temp$LG1), mean(temp$ASYN)),
                      "lower" = c(mean(temp$MIT) - marginError(temp$MIT), 
                                  mean(temp$EG1) - marginError(temp$EG1), 
                                  mean(temp$LG1) - marginError(temp$LG1), 
                                  mean(temp$ASYN) - marginError(temp$ASYN)),
                      "upper" = c(mean(temp$MIT) + marginError(temp$MIT), 
                                  mean(temp$EG1) + marginError(temp$EG1), 
                                  mean(temp$LG1) + marginError(temp$LG1), 
                                  mean(temp$ASYN) + marginError(temp$ASYN)))
gg1 = ggplot(temp.ribbon, aes(x = factor(timepoints, level = c("MIT", "EG1", "LG1", "ASYN")), 
                        y = value, group = 1)) +
  geom_line(color = "black") +
  geom_ribbon(aes(ymin = lower, ymax = upper), fill = "grey70", alpha = 0.3) +
  theme_classic() +
  ggtitle("Bobbie, PRO-seq", subtitle = "selected genes") +
  xlab ("timepoints") + ylab("value") + ylim(0, 65) +
  theme(plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5),
        legend.position = "none",
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

ggsave(filename = here(figDir, "bobbie_gene_EpiLC_dTAGvsDMSO_DOWN_95CI.png"), gg1, width = 4, height = 4, dpi = 300, units = "in", device = "png")

###[2.33] Finding closest enhancer Here, enhancer will be defined by H3K27ac peak It makes more sence to include the enhancer peak on gene body (strategy 1) exclude +-10kb region from TSS only (strategy 2) only check the E-P loops called in Micro-C ##### binary group - excluding all gene body

gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3), ensembl = V6, chr = V1) %>%
  dplyr::select(ensembl, chr, TSS)

group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene


gene.tb <- gene.tb %>% dplyr::filter(ensembl %in% c(group1, group2))
genes.gr <- GRanges(
  seqnames = gene.tb$chr,
  ranges = IRanges(start = gene.tb$TSS, end = gene.tb$TSS),
  ensembl = gene.tb$ensembl
)

genebody.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>% dplyr::select(V1, V2, V3)
colnames(genebody.tb) <- c("chr", "start", "end")
genebody.gr <- makeGRangesFromDataFrame(genebody.tb)


peak.H3K27ac <- importPeak(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.narrowPeak.bed"))
peak.H3K27ac<- makeGRangesFromDataFrame(fread(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.narrowPeak.bed")) %>% 
                                          dplyr::mutate(chr = V1, start = (V2 + V3)/2, end = (V2 + V3)/2) %>%
                                          dplyr::select(chr, start, end))

####### Filtering out peaks overlapping with the gene body
overlaps <- findOverlaps(genebody.gr, peak.H3K27ac)

# Indices of peaks that overlap the TSS
overlapping_peak_indices <- unique(subjectHits(overlaps))

# Exclude overlapping peaks
non_overlapping_peaks <- peak.H3K27ac[-overlapping_peak_indices]


####### Calculating distance to nearest peak
nearest_peak_indices <- nearest(genes.gr, non_overlapping_peaks)
nearest_peaks <- non_overlapping_peaks[nearest_peak_indices]
distances <- distance(genes.gr, nearest_peaks)


results <- data.frame(
  ensembl = mcols(genes.gr)$ensembl,
  gene_chr = as.character(seqnames(genes.gr)),
  gene_TSS = start(genes.gr),
  peak_chr = as.character(seqnames(nearest_peaks)),
  peak_start = start(nearest_peaks),
  peak_end = end(nearest_peaks),
  distance = distances
)

results <- results %>% dplyr::mutate(group = case_when(ensembl %in% group1 ~ "group1",
                                            ensembl %in% group2 ~ "group2",
                                            TRUE ~ NA))

ggplot(results, aes(x = group, y = distance, fill = group)) + geom_violin() + geom_boxplot(outlier.shape = NA, width = 0.1, fill = "white") + theme_bw() + ggtitle("distance to nearest H3K27ac peak excluding gene body") + scale_y_continuous(labels = label_kb_mb) + coord_cartesian(ylim = c(0, 250*1000))

  
binary group - excluding specific gene body
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene


gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3), ensembl = V6, chr = V1) %>%
  dplyr::select(ensembl, chr, TSS) %>% dplyr::filter(ensembl %in% c(group1, group2))
genes.gr <- GRanges(
  seqnames = gene.tb$chr,
  ranges = IRanges(start = gene.tb$TSS, end = gene.tb$TSS),
  ensembl = gene.tb$ensembl
)

genebody.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>% dplyr::filter(V6 %in% c(group1, group2))%>% dplyr::select(V1, V2, V3)
colnames(genebody.tb) <- c("chr", "start", "end")
genebody.gr <- makeGRangesFromDataFrame(genebody.tb)


# Step 1: Find overlaps between gene bodies and peaks
overlaps <- findOverlaps(genebody.gr, peak.H3K27ac)

# Create a list mapping each gene to the indices of peaks overlapping with its gene body
overlapping_peaks_per_gene <- split(subjectHits(overlaps), queryHits(overlaps))

# Initialize an empty list to store results
results_list <- vector("list", length(genes.gr))

# Step 2: For each gene, exclude overlapping peaks and find the nearest peak to its TSS
for (i in seq_along(genes.gr)) {
  gene <- genes.gr[i]
  
  # Get indices of peaks overlapping with this gene's body
  overlapping_peak_indices <- overlapping_peaks_per_gene[[as.character(i)]]
  
  # Exclude overlapping peaks for this gene
  if (!is.null(overlapping_peak_indices)) {
    peaks_to_consider <- peak.H3K27ac[-overlapping_peak_indices]
  } else {
    peaks_to_consider <- peak.H3K27ac
  }
  
  # Find the nearest peak to the TSS of this gene
  nearest_peak_index <- nearest(gene, peaks_to_consider)
  
  if (is.na(nearest_peak_index)) {
    # No peaks found; set NA values
    results_list[[i]] <- data.frame(
      ensembl = mcols(gene)$ensembl,
      gene_chr = as.character(seqnames(gene)),
      gene_TSS = start(gene),
      peak_chr = NA,
      peak_start = NA,
      peak_end = NA,
      distance = NA
    )
  } else {
    nearest_peak <- peaks_to_consider[nearest_peak_index]
    dist <- distance(gene, nearest_peak)
    
    results_list[[i]] <- data.frame(
      ensembl = mcols(gene)$ensembl,
      gene_chr = as.character(seqnames(gene)),
      gene_TSS = start(gene),
      peak_chr = as.character(seqnames(nearest_peak)),
      peak_start = start(nearest_peak),
      peak_end = end(nearest_peak),
      distance = dist
    )
  }
}

# Combine results into a single data frame
results <- do.call(rbind, results_list)

# Add group information
results <- results %>% dplyr::mutate(group = case_when(
  ensembl %in% group1 ~ "group1",
  ensembl %in% group2 ~ "group2",
  TRUE ~ NA_character_
))

# Plotting
ggplot(results, aes(x = group, y = distance, fill = group)) +
  geom_violin() +
  geom_boxplot(outlier.shape = NA, width = 0.1, fill = "white") +
  theme_bw() +
  ggtitle("Distance to Nearest H3K27ac Peak Excluding Gene Body Overlaps") +
  scale_y_continuous(labels = label_kb_mb) +
  coord_cartesian(ylim = c(0, 50 * 1000))
binary group - excluding all TSS +- 10kb
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3), ensembl = V6, chr = V1) %>%
  dplyr::select(ensembl, chr, TSS)

group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene


gene.tb <- gene.tb %>% dplyr::filter(ensembl %in% c(group1, group2))
genes.gr <- GRanges(
  seqnames = gene.tb$chr,
  ranges = IRanges(start = gene.tb$TSS, end = gene.tb$TSS),
  ensembl = gene.tb$ensembl
)

genebody.tb <- gene.tb %>% dplyr::mutate(start = TSS-10*1000, end = TSS + 10*1000) %>%
  dplyr::select(chr, start, end)

genebody.gr <- makeGRangesFromDataFrame(genebody.tb)


peak.H3K27ac <- importPeak(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.narrowPeak.bed"))
# peak.H3K27ac<- makeGRangesFromDataFrame(fread(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.narrowPeak.bed")) %>% 
#                                           dplyr::mutate(chr = V1, start = (V2 + V3)/2, end = (V2 + V3)/2) %>%
#                                           dplyr::select(chr, start, end))

####### Filtering out peaks overlapping with the gene body
overlaps <- findOverlaps(genebody.gr, peak.H3K27ac)

# Indices of peaks that overlap the TSS
overlapping_peak_indices <- unique(subjectHits(overlaps))

# Exclude overlapping peaks
non_overlapping_peaks <- peak.H3K27ac[-overlapping_peak_indices]


####### Calculating distance to nearest peak
nearest_peak_indices <- nearest(genes.gr, non_overlapping_peaks)
nearest_peaks <- non_overlapping_peaks[nearest_peak_indices]
distances <- distance(genes.gr, nearest_peaks)


results <- data.frame(
  ensembl = mcols(genes.gr)$ensembl,
  gene_chr = as.character(seqnames(genes.gr)),
  gene_TSS = start(genes.gr),
  peak_chr = as.character(seqnames(nearest_peaks)),
  peak_start = start(nearest_peaks),
  peak_end = end(nearest_peaks),
  distance = distances
)

results <- results %>% dplyr::mutate(group = case_when(ensembl %in% group1 ~ "group1",
                                            ensembl %in% group2 ~ "group2",
                                            TRUE ~ NA))

ggplot(results, aes(x = group, y = distance, fill = group)) + geom_violin() + geom_boxplot(outlier.shape = NA, width = 0.1, fill = "white") + theme_bw() + ggtitle("distance to nearest H3K27ac peak excluding gene body") + scale_y_continuous(labels = label_kb_mb) + coord_cartesian(ylim = c(0, 250*1000))

  
binary group - excluding specific TSS +- 10 kb
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene


gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3), ensembl = V6, chr = V1) %>%
  dplyr::select(ensembl, chr, TSS) %>% dplyr::filter(ensembl %in% c(group1, group2))
genes.gr <- GRanges(
  seqnames = gene.tb$chr,
  ranges = IRanges(start = gene.tb$TSS, end = gene.tb$TSS),
  ensembl = gene.tb$ensembl
)

genebody.tb <- gene.tb %>% dplyr::mutate(start = TSS-10*1000, end = TSS + 10*1000) %>%
  dplyr::select(chr, start, end)
genebody.gr <- makeGRangesFromDataFrame(genebody.tb)

peak.H3K27ac <- importPeak(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.narrowPeak.bed"))


# Step 1: Find overlaps between gene bodies and peaks
overlaps <- findOverlaps(genebody.gr, peak.H3K27ac)

# Create a list mapping each gene to the indices of peaks overlapping with its gene body
overlapping_peaks_per_gene <- split(subjectHits(overlaps), queryHits(overlaps))

# Initialize an empty list to store results
results_list <- vector("list", length(genes.gr))

# Step 2: For each gene, exclude overlapping peaks and find the nearest peak to its TSS
for (i in seq_along(genes.gr)) {
  gene <- genes.gr[i]
  
  # Get indices of peaks overlapping with this gene's body
  overlapping_peak_indices <- overlapping_peaks_per_gene[[as.character(i)]]
  
  # Exclude overlapping peaks for this gene
  if (!is.null(overlapping_peak_indices)) {
    peaks_to_consider <- peak.H3K27ac[-overlapping_peak_indices]
  } else {
    peaks_to_consider <- peak.H3K27ac
  }
  
  # Find the nearest peak to the TSS of this gene
  nearest_peak_index <- nearest(gene, peaks_to_consider)
  
  if (is.na(nearest_peak_index)) {
    # No peaks found; set NA values
    results_list[[i]] <- data.frame(
      ensembl = mcols(gene)$ensembl,
      gene_chr = as.character(seqnames(gene)),
      gene_TSS = start(gene),
      peak_chr = NA,
      peak_start = NA,
      peak_end = NA,
      distance = NA
    )
  } else {
    nearest_peak <- peaks_to_consider[nearest_peak_index]
    dist <- distance(gene, nearest_peak)
    
    results_list[[i]] <- data.frame(
      ensembl = mcols(gene)$ensembl,
      gene_chr = as.character(seqnames(gene)),
      gene_TSS = start(gene),
      peak_chr = as.character(seqnames(nearest_peak)),
      peak_start = start(nearest_peak),
      peak_end = end(nearest_peak),
      distance = dist
    )
  }
}

# Combine results into a single data frame
results <- do.call(rbind, results_list)

# Add group information
results <- results %>% dplyr::mutate(group = case_when(
  ensembl %in% group1 ~ "group1",
  ensembl %in% group2 ~ "group2",
  TRUE ~ NA_character_
))

getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group == group1) )$distance
  distance2 <- (data %>% dplyr::filter(group == group2) )$distance
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

getPvalWilcox(results, "group1", "group2")
# Plotting
ggplot(results, aes(x = group, y = distance, fill = group)) +
  geom_violin() +
  geom_boxplot(outlier.shape = NA, width = 0.1, fill = "white") +
  theme_bw() +
  ggtitle("Distance to Nearest H3K27ac Peak Excluding Gene Body Overlaps") +
  scale_y_continuous(labels = label_kb_mb) +
  coord_cartesian(ylim = c(0, 50 * 1000)) +
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") 
Only limiting to E-P loops from Micro-C

group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene



name <- "chromo_cons_annoHierarchy"

geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-pe_ensemblList.tsv")),
                                 diffCutoff = 0.2) %>%
  dplyr::filter(Anno2 == "P-E")

temp <- geneAnnoData %>% unnest(gene)

gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3), ensembl = V6, chr = V1) %>%
  dplyr::select(ensembl, chr, TSS)

temp <- dplyr::left_join(temp, gene.tb, by = c("gene" = "ensembl"))

temp <- temp %>% dplyr::mutate(center1 = (start1 + end1)/2,
                               center2 = (start2 + end2)/2,
                               distance1 = abs(TSS-center1),
                               distance2 = abs(TSS-center2)) %>%
  dplyr::mutate(distance = pmax(distance1, distance2))

temp <- temp %>% group_by(gene) %>% summarize(min_enh_distance = min(distance)) %>%
  dplyr::mutate(group = case_when(gene %in% group1 ~ "group1",
                                  gene %in% group2 ~ "group2",
                                  TRUE ~ NA)) %>%
  dplyr::filter(!is.na(group))


ggplot(temp, aes(x = group, y = min_enh_distance, fill = group)) + 
  geom_violin() +
  geom_boxplot(outlier.shape = NA, width = 0.1, fill = "white") +
  theme_bw() +
  ggtitle("Distance to Nearest Enhancer fro E-P") +
  scale_y_continuous(labels = label_kb_mb)  +
  coord_cartesian(ylim = c(0, 1000 * 1000))

[2.24] (WORKING ON)Checking the percentage of non-important loops

Idea: having one non-perturbed reg loops could be enough.

name <- "chromo_cons_annoHierarchy"
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                 diffCutoff = 0.2)


loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_UP_diff0.2.bedpe")) %>%
  dplyr::mutate(size = V3 - V2,
                id = paste(V1, size, V2, V5, sep = "_"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_NO_diff0.2.bedpe")) %>%
  dplyr::mutate(size = V3 - V2,
                id = paste(V1, size, V2, V5, sep = "_"))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_DOWN_diff0.2.bedpe")) %>%
  dplyr::mutate(size = V3 - V2,
                id = paste(V1, size, V2, V5, sep = "_"))

geneAnnoData <- geneAnnoData %>% dplyr::filter(id %in% c(loop.up$id, loop.no$id, loop.down$id)) %>%
  dplyr::mutate(loopType = case_when(id %in% c(loop.up$id, loop.no$id) ~ "insensitive",
                                     id %in% c(loop.down$id) ~ "sensitive",
                                     TRUE ~ NA))

data <- geneAnnoData %>% dplyr::select(gene, id, loopType) %>% unnest(gene) %>%
  group_by(gene) %>% summarize(
    insensitive = sum(loopType == "insensitive", na.rm = TRUE),
    sensitive = sum(loopType == "sensitive", na.rm = TRUE))



###

group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene

geneGroup <- tibble(group = c(rep("group1", length(group1)), rep("group2", length(group2))),
                    gene = c(group1, group2))

data <- left_join(geneGroup, data, by = c("gene")) %>% dplyr::mutate(mutate(across(everything(), ~replace_na(., 0))))


###
data <- data %>% dplyr::mutate(presenceOfRetained = if_else(insensitive > 0, "YES", "NO"),
                       percOfRetained = if_else(insensitive + sensitive == 0, 0, 100*insensitive/(insensitive + sensitive)))


###
ggplot(data, aes(x = group, y = percOfRetained, fill = group)) + geom_violin() + 
  geom_boxplot(outlier.shape = NA, width = 0.1, fill = "white") + theme_bw()


### Stacked barplot
group <- c("group1", "group1", "group2", "group2")
presenceOfRetained <- rep(c("YES", "NO"), 2)
presenceOfRetained <- factor (presenceOfRetained, levels = c("YES", "NO"))
value <- c(nrow(data %>% dplyr::filter(group == "group1", presenceOfRetained == "YES")),
           nrow(data %>% dplyr::filter(group == "group1", presenceOfRetained == "NO")),
           nrow(data %>% dplyr::filter(group == "group2", presenceOfRetained == "YES")),
           nrow(data %>% dplyr::filter(group == "group2", presenceOfRetained == "NO")))
plotData <- data.frame(group, presenceOfRetained, value)

ggplot(plotData, aes(fill=presenceOfRetained, y=value, x=group)) + 
    geom_bar(position="fill", stat="identity") + theme_classic()
name <- "chromo_cons_annoHierarchy"
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-pe_ensemblList.tsv")),
                                 diffCutoff = 0.2)


loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_UP_diff0.2.bedpe")) %>%
  dplyr::mutate(size = V3 - V2,
                id = paste(V1, size, V2, V5, sep = "_"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_NO_diff0.2.bedpe")) %>%
  dplyr::mutate(size = V3 - V2,
                id = paste(V1, size, V2, V5, sep = "_"))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_DOWN_diff0.2.bedpe")) %>%
  dplyr::mutate(size = V3 - V2,
                id = paste(V1, size, V2, V5, sep = "_"))

geneAnnoData <- geneAnnoData %>% dplyr::filter(id %in% c(loop.up$id, loop.no$id, loop.down$id)) %>%
  dplyr::mutate(loopType = case_when(id %in% c(loop.up$id, loop.no$id) ~ "insensitive",
                                     id %in% c(loop.down$id) ~ "sensitive",
                                     TRUE ~ NA))

data <- geneAnnoData %>% dplyr::select(gene, id, loopType) %>% unnest(gene) %>%
  group_by(gene) %>% summarize(
    insensitive = sum(loopType == "insensitive", na.rm = TRUE),
    sensitive = sum(loopType == "sensitive", na.rm = TRUE))



###

group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene

geneGroup <- tibble(group = c(rep("group1", length(group1)), rep("group2", length(group2))),
                    gene = c(group1, group2))

data <- left_join(geneGroup, data, by = c("gene")) %>% dplyr::mutate(mutate(across(everything(), ~replace_na(., 0))))


###
data <- data %>% dplyr::mutate(presenceOfRetained = if_else(insensitive > 0, "YES", "NO"),
                       percOfRetained = if_else(insensitive + sensitive == 0, 0, 100*insensitive/(insensitive + sensitive)))


###
ggplot(data, aes(x = group, y = percOfRetained, fill = group)) + geom_violin() + 
  geom_boxplot(outlier.shape = NA, width = 0.05, fill = "white") + theme_bw()


### Stacked barplot
group <- c("group1", "group1", "group2", "group2")
presenceOfRetained <- rep(c("YES", "NO"), 2)
presenceOfRetained <- factor (presenceOfRetained, levels = c("YES", "NO"))
value <- c(nrow(data %>% dplyr::filter(group == "group1", presenceOfRetained == "YES")),
           nrow(data %>% dplyr::filter(group == "group1", presenceOfRetained == "NO")),
           nrow(data %>% dplyr::filter(group == "group2", presenceOfRetained == "YES")),
           nrow(data %>% dplyr::filter(group == "group2", presenceOfRetained == "NO")))
plotData <- data.frame(group, presenceOfRetained, value)

ggplot(plotData, aes(fill=presenceOfRetained, y=value, x=group)) + 
    geom_bar(position="fill", stat="identity") + theme_classic()

[2.25] Obs/Exp loops

Merging obs/exp scores from different resolution & samples

G1
loops <- fread(here(consensusDir, "chromo_cons_score.tsv"))

sample <- "G1.DMSO.Merged"
for(sample in c("G1.DMSO.Merged", "G1.dTAG.Merged", "G1.A485.Merged")){
  temp.25kb <- fread(here(consensusDir, paste0("loopScore_", sample, "_25kb.tsv"))) %>%
    dplyr::mutate(binSize = end1 - start1,
                  id = paste(chrom1, binSize, start1, start2, sep = "_")) %>%
    dplyr::filter(id %in% loops$id) %>%
    dplyr::select(chrom1, start1, end1, chrom2, start2, end2, id, observed, "O/E")
  colnames(temp.25kb) <- c("chrom1", "start1", "end1", "chrom2", "start2", "end2", "id", "obs", "obsexp")
  
  temp.10kb <- fread(here(consensusDir, paste0("loopScore_", sample, "_10kb.tsv"))) %>%
    dplyr::mutate(binSize = end1 - start1,
                  id = paste(chrom1, binSize, start1, start2, sep = "_")) %>%
    dplyr::filter(id %in% loops$id) %>%
    dplyr::select(chrom1, start1, end1, chrom2, start2, end2, id, observed, "O/E")
  colnames(temp.10kb) <- c("chrom1", "start1", "end1", "chrom2", "start2", "end2", "id", "obs", "obsexp")
  
  temp.5kb <-fread(here(consensusDir, paste0("loopScore_", sample, "_5kb.tsv"))) %>%
    dplyr::mutate(binSize = end1 - start1,
                  id = paste(chrom1, binSize, start1, start2, sep = "_")) %>%
    dplyr::filter(id %in% loops$id) %>%
    dplyr::select(chrom1, start1, end1, chrom2, start2, end2, id, observed, "O/E")
  colnames(temp.5kb) <- c("chrom1", "start1", "end1", "chrom2", "start2", "end2", "id", "obs", "obsexp")
  
  temp <- bind_rows(temp.25kb, temp.10kb, temp.5kb)
  fwrite(temp, here(consensusDir, paste0("loopScore_", sample, ".tsv")), sep = "\t")
}


temp.DMSO <- fread(here(consensusDir, "loopScore_G1.DMSO.Merged.tsv")) %>%
  dplyr::select(id, obs, obsexp)
colnames(temp.DMSO) <- c("id", "obs_DMSO", "obsexp_DMSO")
temp.dTAG <- fread(here(consensusDir, "loopScore_G1.dTAG.Merged.tsv")) %>%
  dplyr::select(id, obs, obsexp)
colnames(temp.dTAG) <- c("id", "obs_dTAG", "obsexp_dTAG")
temp.A485 <- fread(here(consensusDir, "loopScore_G1.A485.Merged.tsv")) %>%
  dplyr::select(id, obs, obsexp)
colnames(temp.A485) <- c("id", "obs_A485", "obsexp_A485")

loops_oe <- full_join(full_join(temp.DMSO, temp.dTAG, by = "id"),
          temp.A485, by = "id") %>%
  dplyr::mutate(oeFC_dTAG_DMSO = if_else(obsexp_DMSO == 0, NA, obsexp_dTAG/obsexp_DMSO),
                oeFC_A485_DMSO = if_else(obsexp_DMSO == 0, NA, obsexp_A485/obsexp_DMSO))

fwrite(loops_oe, here(consensusDir, paste0("loopScore_cons_obsexp.tsv")), sep = "\t")
Async
loops <- fread(here(consensusDir, "chromo_cons_score.tsv"))

for(sample in c("Async.UT", "Async.AID")){
  temp.25kb <- fread(here(consensusDir, paste0("loopScore_", sample, "_25kb.tsv"))) %>%
    dplyr::mutate(binSize = end1 - start1,
                  id = paste(chrom1, binSize, start1, start2, sep = "_")) %>%
    dplyr::filter(id %in% loops$id) %>%
    dplyr::select(chrom1, start1, end1, chrom2, start2, end2, id, observed, "O/E")
  colnames(temp.25kb) <- c("chrom1", "start1", "end1", "chrom2", "start2", "end2", "id", "obs", "obsexp")
  
  temp.10kb <- fread(here(consensusDir, paste0("loopScore_", sample, "_10kb.tsv"))) %>%
    dplyr::mutate(binSize = end1 - start1,
                  id = paste(chrom1, binSize, start1, start2, sep = "_")) %>%
    dplyr::filter(id %in% loops$id) %>%
    dplyr::select(chrom1, start1, end1, chrom2, start2, end2, id, observed, "O/E")
  colnames(temp.10kb) <- c("chrom1", "start1", "end1", "chrom2", "start2", "end2", "id", "obs", "obsexp")
  
  temp.5kb <-fread(here(consensusDir, paste0("loopScore_", sample, "_5kb.tsv"))) %>%
    dplyr::mutate(binSize = end1 - start1,
                  id = paste(chrom1, binSize, start1, start2, sep = "_")) %>%
    dplyr::filter(id %in% loops$id) %>%
    dplyr::select(chrom1, start1, end1, chrom2, start2, end2, id, observed, "O/E")
  colnames(temp.5kb) <- c("chrom1", "start1", "end1", "chrom2", "start2", "end2", "id", "obs", "obsexp")
  
  temp <- bind_rows(temp.25kb, temp.10kb, temp.5kb)
  fwrite(temp, here(consensusDir, paste0("loopScore_", sample, ".tsv")), sep = "\t")
}


temp.UT <- fread(here(consensusDir, "loopScore_Async.UT.tsv")) %>%
  dplyr::select(id, obs, obsexp)
colnames(temp.UT) <- c("id", "obs_UT", "obsexp_UT")
temp.AID <- fread(here(consensusDir, "loopScore_Async.AID.tsv")) %>%
  dplyr::select(id, obs, obsexp)
colnames(temp.AID) <- c("id", "obs_AID", "obsexp_AID")

loops_oe <- full_join(temp.UT, temp.AID, by = "id") %>%
  dplyr::mutate(oeFC_AID_UT = if_else(obsexp_UT == 0, NA, obsexp_AID/obsexp_UT))

fwrite(loops_oe, here(consensusDir, paste0("loopScore_cons_obsexp_Async.tsv")), sep = "\t")

Merging obs/exp per resolution

Only the loops that made to consensus loops are considered ##### G1

loops <- fread(here(consensusDir, "chromo_cons_score.tsv"))

res <- 25

for (res in c(25, 10, 5)){
  
  temp.DMSO <- fread(here(consensusDir, paste0("loopScore_G1.DMSO.Merged_", res, "kb.tsv"))) %>%
    dplyr::mutate(binSize = end1 - start1,
                  id = paste(chrom1, binSize, start1, start2, sep = "_")) %>%
    dplyr::filter(id %in% loops$id) %>%
    dplyr::select(id, observed, "O/E")
  colnames(temp.DMSO) <- c("id", "obs_DMSO", "obsexp_DMSO")
  
  temp.dTAG <- fread(here(consensusDir, paste0("loopScore_G1.dTAG.Merged_", res, "kb.tsv"))) %>%
    dplyr::mutate(binSize = end1 - start1,
                  id = paste(chrom1, binSize, start1, start2, sep = "_")) %>%
    dplyr::filter(id %in% loops$id) %>%
    dplyr::select(id, observed, "O/E")
  colnames(temp.dTAG) <- c("id", "obs_dTAG", "obsexp_dTAG")
  
  temp.A485 <- fread(here(consensusDir, paste0("loopScore_G1.A485.Merged_", res, "kb.tsv"))) %>%
    dplyr::mutate(binSize = end1 - start1,
                  id = paste(chrom1, binSize, start1, start2, sep = "_")) %>%
    dplyr::filter(id %in% loops$id) %>%
    dplyr::select(id, observed, "O/E")
  colnames(temp.A485) <- c("id", "obs_A485", "obsexp_A485")
  
  temp <- full_join(full_join(temp.DMSO, temp.dTAG, by = c("id")), temp.A485, by = c("id")) %>% 
    dplyr::mutate(oeFC_dTAG_DMSO = if_else(obsexp_DMSO == 0, NA, obsexp_dTAG/obsexp_DMSO),
                  oeFC_A485_DMSO = if_else(obsexp_DMSO == 0, NA, obsexp_A485/obsexp_DMSO))
  
  fwrite(temp, here(consensusDir, paste0("loopScore_cons_obsexp_", res, "kb.tsv")), sep = "\t")
}
Async
loops <- fread(here(consensusDir, "chromo_cons_score.tsv"))

res <- 25

for (res in c(25, 10, 5)){
  
  temp.UT <- fread(here(consensusDir, paste0("loopScore_Async.UT_", res, "kb.tsv"))) %>%
    dplyr::mutate(binSize = end1 - start1,
                  id = paste(chrom1, binSize, start1, start2, sep = "_")) %>%
    dplyr::filter(id %in% loops$id) %>%
    dplyr::select(id, observed, "O/E")
  colnames(temp.UT) <- c("id", "obs_UT", "obsexp_UT")
  
  temp.AID <- fread(here(consensusDir, paste0("loopScore_Async.AID_", res, "kb.tsv"))) %>%
    dplyr::mutate(binSize = end1 - start1,
                  id = paste(chrom1, binSize, start1, start2, sep = "_")) %>%
    dplyr::filter(id %in% loops$id) %>%
    dplyr::select(id, observed, "O/E")
  colnames(temp.AID) <- c("id", "obs_AID", "obsexp_AID")

  
  temp <- full_join(temp.UT, temp.AID, by = c("id")) %>% 
    dplyr::mutate(oeFC_AID_UT = if_else(obsexp_UT == 0, NA, obsexp_AID/obsexp_UT))
  
  fwrite(temp, here(consensusDir, paste0("loopScore_cons_obsexp_", res, "kb_async.tsv")), sep = "\t")
}

Comparing obs/exp from chrommosight loop scores

dTAG per res
# Importing loops
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_UP_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_DOWN_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_NO_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))

for(res in c(25, 10, 5)){
  minValue <- -4
  maxValue <- 5
  obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp_", res, "kb.tsv"))) %>%
    dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                  log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
                  log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)))
  
  
  # dTAG
  obsexp$density <- get_density(obsexp$log_obsexp_DMSO, obsexp$log_obsexp_dTAG, n = 100)
  obsexp <- obsexp %>% dplyr::arrange(density)
  g1 <- ggplot(obsexp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) + 
    geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
    geom_abline(slope = 1, intercept = 0) +
    ggtitle(paste0(res, "kb, log2(obs/exp)")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 
  
  
  # Visualize UP DOWN loops
  temp <- obsexp %>% dplyr::filter(id %in% loop.down$id)
  temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
  temp <- temp %>% dplyr::arrange(density)
  g2 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) + 
    geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
    geom_abline(slope = 1, intercept = 0) +
    ggtitle(paste0(res, "kb, log2(obs/exp), down")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
  
  
  temp <- obsexp %>% dplyr::filter(id %in% loop.no$id)
  temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
  temp <- temp %>% dplyr::arrange(density)
  g3 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) + 
    geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
    geom_abline(slope = 1, intercept = 0) +
    ggtitle(paste0(res, "kb, log2(obs/exp), no")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
  
  temp <- obsexp %>% dplyr::filter(id %in% loop.up$id)
  temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
  temp <- temp %>% dplyr::arrange(density)
  g4 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) + 
    geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
    geom_abline(slope = 1, intercept = 0) +
    ggtitle(paste0(res, "kb, log2(obs/exp), up")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
  
  
  fileName <- paste0("obsexp_dTAG_vs_DMSO_", res, "kb")
  height <- 4
  width <- 12
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(plot_grid(g1, g4, g3, g2, ncol = 4))
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(plot_grid(g1, g4, g3, g2, ncol = 4))
  dev.off()
}
dTAG all res
# Importing loops
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_UP_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_DOWN_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_NO_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))

minValue <- -4
maxValue <- 5

obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
  dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
                log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)))



# dTAG
obsexp$density <- get_density(obsexp$log_obsexp_DMSO, obsexp$log_obsexp_dTAG, n = 100)
obsexp <- obsexp %>% dplyr::arrange(density)
g1 <- ggplot(obsexp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp)")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

# Visualize UP DOWN loops
temp <- obsexp %>% dplyr::filter(id %in% loop.down$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g2 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), down")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 


temp <- obsexp %>% dplyr::filter(id %in% loop.no$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g3 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), no")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

temp <- obsexp %>% dplyr::filter(id %in% loop.up$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g4 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), up")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 


fileName <- paste0("obsexp_dTAG_vs_DMSO")
height <- 4
width <- 12
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()


# Structural
loop.str <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.str$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g5 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), structural loops")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

# Reg (PE-PE)
loop.reg <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.reg$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g6 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), regulatory loops")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

fileName <- paste0("obsexp_dTAG_vs_DMSO_str_vs_reg")
height <- 4
width <- 6
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
Plotting only subset

specificLoop.25kb <- fread(here(loopDir, "G1.DMSO.Merged_chromosight_25kb.bedpe")) %>%
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
specificLoop.10kb <- fread(here(loopDir, "G1.DMSO.Merged_chromosight_10kb.bedpe")) %>%
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
specificLoop.5kb <- fread(here(loopDir, "G1.DMSO.Merged_chromosight_5kb.bedpe")) %>%
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))

specificLoops.DMSO <- c(specificLoop.25kb$id, specificLoop.10kb$id, specificLoop.5kb$id)


minValue <- -4
maxValue <- 5

obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
  dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
                log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)))




# Structural
loop.str <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.str$id,
                                 id %in% specificLoops.DMSO)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g5 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), structural loops")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

# Reg (PE-PE)
loop.reg <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.reg$id,
                                 id %in% specificLoops.DMSO)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g6 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), regulatory loops")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

fileName <- paste0("obsexp_dTAG_vs_DMSO_str_vs_reg_DMSOspecificLoops")
height <- 4
width <- 6
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
Plotting 1mb
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))

# Import obs/exp scores and merge to the dataset
minValue <- -4
maxValue <- 5
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
    dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                  log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
                  log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)))

# Merge dataset
data <- data %>% dplyr::left_join(obsexp, by = c("id")) %>% dplyr::mutate(distance = start2 - start1)


# Importing loops
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_UP_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_DOWN_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_NO_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))

# distance filter
obsexp <- data %>% dplyr::filter(distance > 1e6)


# dTAG
obsexp$density <- get_density(obsexp$log_obsexp_DMSO, obsexp$log_obsexp_dTAG, n = 100)
obsexp <- obsexp %>% dplyr::arrange(density)
g1 <- ggplot(obsexp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp)")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

# Visualize UP DOWN loops
temp <- obsexp %>% dplyr::filter(id %in% loop.down$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g2 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), down")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 


temp <- obsexp %>% dplyr::filter(id %in% loop.no$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g3 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), no")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

temp <- obsexp %>% dplyr::filter(id %in% loop.up$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g4 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), up")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 


fileName <- paste0("obsexp_dTAG_vs_DMSO_1mbover")
height <- 4
width <- 12
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()


# Structural
loop.str <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.str$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g5 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), structural loops")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

# Reg (PE-PE)
loop.reg <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.reg$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g6 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), regulatory loops")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

fileName <- paste0("obsexp_dTAG_vs_DMSO_1mbover_str_vs_reg")
height <- 4
width <- 6
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
Plotting Obs
minValue <- -4
maxValue <- 10
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
    dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                  log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
                  log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)),
                  log_obs_DMSO = if_else(obs_DMSO == 0, minValue, log2(obs_DMSO)),
                  log_obs_dTAG = if_else(obs_dTAG == 0, minValue, log2(obs_dTAG)),
                  log_obs_A485 = if_else(obs_A485 == 0, minValue, log2(obs_A485)))

# Importing loops
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_UP_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_DOWN_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_NO_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))



# dTAG
obsexp$density <- get_density(obsexp$log_obs_DMSO, obsexp$log_obs_dTAG, n = 100)
obsexp <- obsexp %>% dplyr::arrange(density)
g1 <- ggplot(obsexp, aes(x = log_obs_DMSO, y = log_obs_dTAG, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs)")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

# Visualize UP DOWN loops
temp <- obsexp %>% dplyr::filter(id %in% loop.down$id)
temp$density <- get_density(temp$log_obs_DMSO, temp$log_obs_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g2 <- ggplot(temp, aes(x = log_obs_DMSO, y = log_obs_dTAG, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs), down")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 


temp <- obsexp %>% dplyr::filter(id %in% loop.no$id)
temp$density <- get_density(temp$log_obs_DMSO, temp$log_obs_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g3 <- ggplot(temp, aes(x = log_obs_DMSO, y = log_obs_dTAG, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs), no")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

temp <- obsexp %>% dplyr::filter(id %in% loop.up$id)
temp$density <- get_density(temp$log_obs_DMSO, temp$log_obs_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g4 <- ggplot(temp, aes(x = log_obs_DMSO, y = log_obs_dTAG, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs), up")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 


fileName <- paste0("obsexp_dTAG_vs_DMSO_obs")
height <- 4
width <- 12
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()


# Structural
loop.str <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.str$id)
temp$density <- get_density(temp$log_obs_DMSO, temp$log_obs_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g5 <- ggplot(temp, aes(x = log_obs_DMSO, y = log_obs_dTAG, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs), structural loops")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

# Reg (PE-PE)
loop.reg <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.reg$id)
temp$density <- get_density(temp$log_obs_DMSO, temp$log_obs_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g6 <- ggplot(temp, aes(x = log_obs_DMSO, y = log_obs_dTAG, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs), regulatory loops")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

fileName <- paste0("obsexp_dTAG_vs_DMSO_obs_str_vs_reg")
height <- 4
width <- 6
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
Async.AID all res
# Importing loops
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_UP_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_DOWN_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_NO_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))

minValue <- -8
maxValue <- 7

obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp_Async.tsv"))) %>%
  dplyr::mutate(log_obsexp_UT = if_else(obsexp_UT == 0, minValue, log2(obsexp_UT)),
                log_obsexp_AID = if_else(obsexp_AID == 0, minValue, log2(obsexp_AID)))



# dTAG
obsexp$density <- get_density(obsexp$log_obsexp_UT, obsexp$log_obsexp_AID, n = 100)
obsexp <- obsexp %>% dplyr::arrange(density)
g1 <- ggplot(obsexp, aes(x = log_obsexp_UT, y = log_obsexp_AID, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp)")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

# Visualize UP DOWN loops
temp <- obsexp %>% dplyr::filter(id %in% loop.down$id)
temp$density <- get_density(temp$log_obsexp_UT, temp$log_obsexp_AID, n = 100)
temp <- temp %>% dplyr::arrange(density)
g2 <- ggplot(temp, aes(x = log_obsexp_UT, y = log_obsexp_AID, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), down")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 


temp <- obsexp %>% dplyr::filter(id %in% loop.no$id)
temp$density <- get_density(temp$log_obsexp_UT, temp$log_obsexp_AID, n = 100)
temp <- temp %>% dplyr::arrange(density)
g3 <- ggplot(temp, aes(x = log_obsexp_UT, y = log_obsexp_AID, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), no")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

temp <- obsexp %>% dplyr::filter(id %in% loop.up$id)
temp$density <- get_density(temp$log_obsexp_UT, temp$log_obsexp_AID, n = 100)
temp <- temp %>% dplyr::arrange(density)
g4 <- ggplot(temp, aes(x = log_obsexp_UT, y = log_obsexp_AID, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), up")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 


fileName <- paste0("obsexp_AID_vs_UT")
height <- 4
width <- 12
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()


# Structural
loop.str <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.str$id)
temp$density <- get_density(temp$log_obsexp_UT, temp$log_obsexp_AID, n = 100)
temp <- temp %>% dplyr::arrange(density)
g5 <- ggplot(temp, aes(x = log_obsexp_UT, y = log_obsexp_AID, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), structural loops")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

# Reg (PE-PE)
loop.reg <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.reg$id)
temp$density <- get_density(temp$log_obsexp_UT, temp$log_obsexp_AID, n = 100)
temp <- temp %>% dplyr::arrange(density)
g6 <- ggplot(temp, aes(x = log_obsexp_UT, y = log_obsexp_AID, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), regulatory loops")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

fileName <- paste0("obsexp_AID_vs_UT_str_vs_reg")
height <- 4
width <- 6
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
Plotting Obs

minValue <- -20
maxValue <- 10

obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp_Async.tsv"))) %>%
  dplyr::mutate(log_obsexp_UT = if_else(obsexp_UT == 0, minValue, log2(obsexp_UT)),
                log_obsexp_AID = if_else(obsexp_AID == 0, minValue, log2(obsexp_AID)),
                log_obs_UT = if_else(obs_UT == 0, minValue, log2(obs_UT)),
                log_obs_AID = if_else(obs_AID == 0, minValue, log2(obs_AID)))

# Importing loops
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_UP_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_DOWN_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_NO_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))



# dTAG
obsexp$density <- get_density(obsexp$log_obs_UT, obsexp$log_obs_AID, n = 100)
obsexp <- obsexp %>% dplyr::arrange(density)
g1 <- ggplot(obsexp, aes(x = log_obs_UT, y = log_obs_AID, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs)")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

# Visualize UP DOWN loops
temp <- obsexp %>% dplyr::filter(id %in% loop.down$id)
temp$density <- get_density(temp$log_obs_UT, temp$log_obs_AID, n = 100)
temp <- temp %>% dplyr::arrange(density)
g2 <- ggplot(temp, aes(x = log_obs_UT, y = log_obs_AID, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs), down")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 


temp <- obsexp %>% dplyr::filter(id %in% loop.no$id)
temp$density <- get_density(temp$log_obs_UT, temp$log_obs_AID, n = 100)
temp <- temp %>% dplyr::arrange(density)
g3 <- ggplot(temp, aes(x = log_obs_UT, y = log_obs_AID, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs), no")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

temp <- obsexp %>% dplyr::filter(id %in% loop.up$id)
temp$density <- get_density(temp$log_obs_UT, temp$log_obs_AID, n = 100)
temp <- temp %>% dplyr::arrange(density)
g4 <- ggplot(temp, aes(x = log_obs_UT, y = log_obs_AID, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs), up")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 


fileName <- paste0("obs_AID_vs_UT_obs")
height <- 4
width <- 12
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()


# Structural
loop.str <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.str$id)
temp$density <- get_density(temp$log_obs_UT, temp$log_obs_AID, n = 100)
temp <- temp %>% dplyr::arrange(density)
g5 <- ggplot(temp, aes(x = log_obs_UT, y = log_obs_AID, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs), structural loops")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

# Reg (PE-PE)
loop.reg <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.reg$id)
temp$density <- get_density(temp$log_obs_UT, temp$log_obs_AID, n = 100)
temp <- temp %>% dplyr::arrange(density)
g6 <- ggplot(temp, aes(x = log_obs_UT, y = log_obs_AID, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs), regulatory loops")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

fileName <- paste0("obs_AID_vs_UT_obs_str_vs_reg")
height <- 4
width <- 6
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
A485 per res
# Importing loops
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_A485vsDMSO_UP_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_A485vsDMSO_DOWN_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_A485vsDMSO_NO_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))

for(res in c(25, 10, 5)){
  minValue <- -4
  maxValue <- 5
  obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp_", res, "kb.tsv"))) %>%
    dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                  log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)))
  
  
  # A485
  obsexp$density <- get_density(obsexp$log_obsexp_DMSO, obsexp$log_obsexp_A485, n = 100)
  obsexp <- obsexp %>% dplyr::arrange(density)
  g1 <- ggplot(obsexp, aes(x = log_obsexp_DMSO, y = log_obsexp_A485, color = density)) + 
    geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
    geom_abline(slope = 1, intercept = 0) +
    ggtitle(paste0(res, "kb, log2(obs/exp)")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 
  
  
  # Visualize UP DOWN loops
  temp <- obsexp %>% dplyr::filter(id %in% loop.down$id)
  temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_A485, n = 100)
  temp <- temp %>% dplyr::arrange(density)
  g2 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_A485, color = density)) + 
    geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
    geom_abline(slope = 1, intercept = 0) +
    ggtitle(paste0(res, "kb, log2(obs/exp), down")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 
  
  
  temp <- obsexp %>% dplyr::filter(id %in% loop.no$id)
  temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_A485, n = 100)
  temp <- temp %>% dplyr::arrange(density)
  g3 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_A485, color = density)) + 
    geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
    geom_abline(slope = 1, intercept = 0) +
    ggtitle(paste0(res, "kb, log2(obs/exp), no")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 
  
  temp <- obsexp %>% dplyr::filter(id %in% loop.up$id)
  temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_A485, n = 100)
  temp <- temp %>% dplyr::arrange(density)
  g4 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_A485, color = density)) + 
    geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
    geom_abline(slope = 1, intercept = 0) +
    ggtitle(paste0(res, "kb, log2(obs/exp), up")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 
  
  
  fileName <- paste0("obsexp_A485_vs_DMSO_", res, "kb")
  height <- 4
  width <- 12
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(plot_grid(g1, g4, g3, g2, ncol = 4))
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(plot_grid(g1, g4, g3, g2, ncol = 4))
  dev.off()
}
A485 all res
# Importing loops
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_A485vsDMSO_UP_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_A485vsDMSO_DOWN_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_A485vsDMSO_NO_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))

minValue <- -4
maxValue <- 5

obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
    dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                  log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)))


# A485
obsexp$density <- get_density(obsexp$log_obsexp_DMSO, obsexp$log_obsexp_A485, n = 100)
obsexp <- obsexp %>% dplyr::arrange(density)
g1 <- ggplot(obsexp, aes(x = log_obsexp_DMSO, y = log_obsexp_A485, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp)")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

# Visualize UP DOWN loops
temp <- obsexp %>% dplyr::filter(id %in% loop.down$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_A485, n = 100)
temp <- temp %>% dplyr::arrange(density)
g2 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_A485, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), down")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 


temp <- obsexp %>% dplyr::filter(id %in% loop.no$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_A485, n = 100)
temp <- temp %>% dplyr::arrange(density)
g3 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_A485, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), no")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

temp <- obsexp %>% dplyr::filter(id %in% loop.up$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_A485, n = 100)
temp <- temp %>% dplyr::arrange(density)
g4 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_A485, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), up")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 


fileName <- paste0("obsexp_A485_vs_DMSO")
height <- 4
width <- 12
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()


# Structural
loop.str <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.str$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_A485, n = 100)
temp <- temp %>% dplyr::arrange(density)
g5 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_A485, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), structural loops")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

# Reg (PE-PE)
loop.reg <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.reg$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_A485, n = 100)
temp <- temp %>% dplyr::arrange(density)
g6 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_A485, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), regulatory loops")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

fileName <- paste0("obsexp_A485_vs_DMSO_str_vs_reg")
height <- 4
width <- 6
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
Plotting Obs
minValue <- -4
maxValue <- 10
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
    dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                  log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
                  log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)),
                  log_obs_DMSO = if_else(obs_DMSO == 0, minValue, log2(obs_DMSO)),
                  log_obs_dTAG = if_else(obs_dTAG == 0, minValue, log2(obs_dTAG)),
                  log_obs_A485 = if_else(obs_A485 == 0, minValue, log2(obs_A485)))

# Importing loops
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_A485vsDMSO_UP_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_A485vsDMSO_DOWN_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_A485vsDMSO_NO_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))



# dTAG
obsexp$density <- get_density(obsexp$log_obs_DMSO, obsexp$log_obs_A485, n = 100)
obsexp <- obsexp %>% dplyr::arrange(density)
g1 <- ggplot(obsexp, aes(x = log_obs_DMSO, y = log_obs_A485, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs)")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

# Visualize UP DOWN loops
temp <- obsexp %>% dplyr::filter(id %in% loop.down$id)
temp$density <- get_density(temp$log_obs_DMSO, temp$log_obs_A485, n = 100)
temp <- temp %>% dplyr::arrange(density)
g2 <- ggplot(temp, aes(x = log_obs_DMSO, y = log_obs_A485, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs), down")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 


temp <- obsexp %>% dplyr::filter(id %in% loop.no$id)
temp$density <- get_density(temp$log_obs_DMSO, temp$log_obs_A485, n = 100)
temp <- temp %>% dplyr::arrange(density)
g3 <- ggplot(temp, aes(x = log_obs_DMSO, y = log_obs_A485, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs), no")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

temp <- obsexp %>% dplyr::filter(id %in% loop.up$id)
temp$density <- get_density(temp$log_obs_DMSO, temp$log_obs_A485, n = 100)
temp <- temp %>% dplyr::arrange(density)
g4 <- ggplot(temp, aes(x = log_obs_DMSO, y = log_obs_A485, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs), up")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 


fileName <- paste0("obsexp_A485_vs_DMSO_obs")
height <- 4
width <- 12
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()


# Structural
loop.str <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.str$id)
temp$density <- get_density(temp$log_obs_DMSO, temp$log_obs_A485, n = 100)
temp <- temp %>% dplyr::arrange(density)
g5 <- ggplot(temp, aes(x = log_obs_DMSO, y = log_obs_A485, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs), structural loops")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

# Reg (PE-PE)
loop.reg <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.reg$id)
temp$density <- get_density(temp$log_obs_DMSO, temp$log_obs_A485, n = 100)
temp <- temp %>% dplyr::arrange(density)
g6 <- ggplot(temp, aes(x = log_obs_DMSO, y = log_obs_A485, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs), regulatory loops")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

fileName <- paste0("obsexp_A485_vs_DMSO_obs_str_vs_reg")
height <- 4
width <- 6
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()

[2.26] Repeating few analysis with obs/exp

Function

create_dist_vs_avgScore_perTreatment_oe <- function(data, figDir, name, loopList, colorList, se = FALSE){
  avg_scores_long <- data %>%
    group_by(distance, Anno2) %>%
    summarise(avg_score = mean(score, na.rm = TRUE)) %>%
    ungroup() 
  avg_scores_long$Anno2 <- factor(avg_scores_long$Anno2, level = loopList)
  p4 <- ggplot(avg_scores_long, aes(x = distance, y = avg_score, color = Anno2, fill = Anno2)) + 
    geom_smooth(show.legend = TRUE, se = se)  +
    #ylim(0, 0.5) +
    theme_classic() +  scale_x_continuous(labels = label_kb_mb) +
    scale_color_manual(values = colorList) +
    scale_fill_manual(values = colorList) +
    labs(title = paste0(name),
         x = "Distance",
         y = "Average Score") +
    theme(plot.title = element_text(size = 8))
  
  fileName <- paste0("dist_vs_score_linePlot_", name)
  png(here(figDir, paste0(fileName, ".png")), 
      res = 600, units = "in", width = 4, height = 3)
  print(p4)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")), 
          width = 4, height = 3)
  print(p4)
  dev.off()
} 

create_dist_vs_avgDiffScore_perTreatment_oe <- function(data, figDir, name, loopList, colorList, se = FALSE){
  avg_scores_long <- data %>%
    group_by(distance, Anno2) %>%
    summarise(avg_score = mean(score, na.rm = TRUE)) %>%
    ungroup() 
  avg_scores_long$Anno2 <- factor(avg_scores_long$Anno2, level = loopList)
  p4 <- ggplot(avg_scores_long, aes(x = distance, y = avg_score, color = Anno2, fill = Anno2)) + 
    geom_hline(yintercept = 0) +
    geom_smooth(show.legend = TRUE, se = se)  +
    theme_classic() +  scale_x_continuous(labels = label_kb_mb) +
    scale_color_manual(values = colorList) +
    scale_fill_manual(values = colorList) +
    labs(title = paste0(name),
         x = "Distance",
         y = "Average Diff Score") +
    theme(plot.title = element_text(size = 8)) 
  
  fileName <- paste0("dist_vs_score_difflinePlot_", name)
  png(here(figDir, paste0(fileName, ".png")), 
      res = 600, units = "in", width = 4, height = 3)
  print(p4)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")), 
          width = 4, height = 3)
  print(p4)
  dev.off()
} 

create_dist_barplot_oe <- function(data, figDir, name, note, loopList, diffCutoff, distanceFilter = 2*e6){
    data <- data %>% 
    dplyr::mutate(distance = start2 - start1,
                  updown_dTAG_DMSO = ifelse(log_obsexp_diff_dTAG_DMSO > diffCutoff, "UP", 
                                            ifelse(log_obsexp_diff_dTAG_DMSO > -diffCutoff, "NO", "DOWN")),
                  updown_A485_DMSO = ifelse(log_obsexp_diff_A485_DMSO > diffCutoff, "UP", 
                                            ifelse(log_obsexp_diff_A485_DMSO > -diffCutoff, "NO", "DOWN"))) %>%
    dplyr::filter(Anno2 %in% loopList,
                  distance < distanceFilter)
  data$updown_dTAG_DMSO <- factor(data$updown_dTAG_DMSO, levels = c("UP", "NO", "DOWN"))
  data$updown_A485_DMSO <- factor(data$updown_A485_DMSO, levels = c("UP", "NO", "DOWN"))
  
  
  temp <- data %>% dplyr::select(Anno2, distance, updown_dTAG_DMSO, updown_A485_DMSO) %>%
    dplyr::filter(Anno2 %in% loopList,
                  updown_dTAG_DMSO %in% c("UP", "NO", "DOWN"))
  p <- ggplot(temp, aes(x = updown_dTAG_DMSO, y = distance)) +
    geom_violin(aes(fill = updown_dTAG_DMSO)) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = updown_dTAG_DMSO), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() + ggtitle(note) +
    scale_y_continuous(labels = label_kb_mb)
  fileName <- paste0("size_barplot_", name, "_dTAG_vs_DMSO_", note, "_", diffCutoff)
  height <- 3
  width <- 4
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
  
  
  temp <- data %>% dplyr::select(Anno2, distance, updown_dTAG_DMSO, updown_A485_DMSO) %>%
    dplyr::filter(Anno2 %in% loopList,
                  updown_A485_DMSO %in% c("UP", "NO",  "DOWN"))
  p <- ggplot(temp, aes(x = updown_A485_DMSO, y = distance)) +
    geom_violin(aes(fill = updown_A485_DMSO)) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = updown_A485_DMSO), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() + ggtitle(note) +
    scale_y_continuous(labels = label_kb_mb)
  fileName <- paste0("size_barplot_", name, "_A485_vs_DMSO_", note, "_", diffCutoff)
  height <- 3
  width <- 4
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}

create_loop_scatterplot_oe <- function(data, figDir, name, Anno2List, diffCutoff){
  data <- data %>%
    dplyr::mutate(updown_dTAG_DMSO = ifelse(log_obsexp_diff_dTAG_DMSO > diffCutoff, "UP", 
                                            ifelse(log_obsexp_diff_dTAG_DMSO > -diffCutoff, "NO", "DOWN")),
                  updown_A485_DMSO = ifelse(log_obsexp_diff_A485_DMSO > diffCutoff, "UP", 
                                            ifelse(log_obsexp_diff_A485_DMSO > -diffCutoff, "NO", "DOWN"))) %>%
    dplyr::filter(Anno2 %in% Anno2List)
  data$updown_dTAG_DMSO <- factor(data$updown_dTAG_DMSO, levels = c("UP", "NO", "DOWN"))
  data$updown_A485_DMSO <- factor(data$updown_A485_DMSO, levels = c("UP", "NO", "DOWN"))
  
  num.up <- (summary(data$updown_dTAG_DMSO))["UP"]
  num.no <- (summary(data$updown_dTAG_DMSO))["NO"]
  num.down <- (summary(data$updown_dTAG_DMSO))["DOWN"]
  num.all <- num.up + num.no + num.down
  perc.up <- round(num.up / num.all * 100, 2)
  perc.no <- round(num.no / num.all * 100, 2)
  perc.down <- round(num.down / num.all * 100, 2)
  
  ### Scatterplot
    minValue <- -4
  maxValue <- 5
  data$density <- get_density(data$log_obsexp_DMSO, data$log_obsexp_dTAG, n = 100)
  data <- data %>% dplyr::arrange(density)
  correlation <- cor(data$log_obsexp_DMSO, data$log_obsexp_dTAG)
  p1 <- ggplot(data, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) +
    geom_point(show.legend = FALSE) + 
    scale_color_viridis() +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) +
    geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
    geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
    geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
    geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
    geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
    annotate("text", x = minValue, y = maxValue, label = paste0("UP: ", num.up, " (", perc.up, "%)"), 
             color = "black", hjust = 0, size = 3) +
    annotate("text", x = minValue, y = maxValue-1, label = paste0("NO: ", num.no, " (", perc.no, "%)"), 
             color = "black", hjust = 0, size = 3) +
    annotate("text", x = minValue, y = maxValue-2, label = paste0("DOWN: ", num.down, " (", perc.down, "%)"), 
             color = "black", hjust = 0, size = 3) +
    theme_classic() + ggtitle(name) + theme(plot.title = element_text(size = 5)) +
    annotate("text", x = minValue, y = maxValue, label = paste("r =", round(correlation, 2)), size = 5, color = "black")

  
  
  num.up <- (summary(data$updown_A485_DMSO))["UP"]
  num.no <- (summary(data$updown_A485_DMSO))["NO"]
  num.down <- (summary(data$updown_A485_DMSO))["DOWN"]
  num.all <- num.up + num.no + num.down
  perc.up <- round(num.up / num.all * 100, 2)
  perc.no <- round(num.no / num.all * 100, 2)
  perc.down <- round(num.down / num.all * 100, 2)
  
  data$density <- get_density(data$log_obsexp_DMSO, data$log_obsexp_A485, n = 100)
  data <- data %>% dplyr::arrange(density)
  correlation <- cor(data$log_obsexp_DMSO, data$log_obsexp_A485)
  p2 <- ggplot(data, aes(x = log_obsexp_DMSO, y = log_obsexp_A485, color = density)) +
    geom_point(show.legend = FALSE) + 
    scale_color_viridis() +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) +
    geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
    geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
    geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
    geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
    geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
    annotate("text", x = minValue, y = maxValue, label = paste0("UP: ", num.up, " (", perc.up, "%)"), 
             color = "black", hjust = 0, size = 3) +
    annotate("text", x = minValue, y = maxValue-1, label = paste0("NO: ", num.no, " (", perc.no, "%)"), 
             color = "black", hjust = 0, size = 3) +
    annotate("text", x = minValue, y = maxValue-2, label = paste0("DOWN: ", num.down, " (", perc.down, "%)"), 
             color = "black", hjust = 0, size = 3) +
    theme_classic() + ggtitle(name) + theme(plot.title = element_text(size = 5)) +
    annotate("text", x = minValue, y = maxValue, label = paste("r =", round(correlation, 2)), size = 5, color = "black")

  
  
  fileName <- paste0("scatterplot_", name, "_", diffCutoff)
  png(here(figDir, paste0(fileName, ".png")), res = 600, units = "in", width = 5*1.5, height = 2.5*1.5)
  print(cowplot::plot_grid(p1, p2, align = "h"))
  dev.off()
  
  svglite(here(figDir, paste0(fileName, ".svg")), 
          width = 5*1.5, height = 2.5*1.5)
  print(cowplot::plot_grid(p1, p2, align = "h"))
  dev.off()
}

make_diff_bedpe_oe <- function(data, name, Anno2List, outDir, diffCutoff){
  data <- data %>%
    dplyr::mutate(updown_dTAG_DMSO = ifelse(log_obsexp_diff_dTAG_DMSO > diffCutoff, "UP", 
                                            ifelse(log_obsexp_diff_dTAG_DMSO > -diffCutoff, "NO", "DOWN")),
                  updown_A485_DMSO = ifelse(log_obsexp_diff_A485_DMSO > diffCutoff, "UP", 
                                            ifelse(log_obsexp_diff_A485_DMSO > -diffCutoff, "NO", "DOWN"))) %>%
    dplyr::filter(Anno2 %in% Anno2List)
  data$updown_dTAG_DMSO <- factor(data$updown_dTAG_DMSO, levels = c("UP", "NO", "DOWN"))
  data$updown_A485_DMSO <- factor(data$updown_A485_DMSO, levels = c("UP", "NO", "DOWN"))
  
  out.temp <- data %>% dplyr::filter(updown_dTAG_DMSO == "UP") %>% dplyr::select(c(1, 2, 3, 4, 5, 6))
  fwrite(out.temp, here(outDir, paste0(name, "_dTAGvsDMSO_UP_diff", diffCutoff, ".bedpe")), 
         sep = "\t", col.names = FALSE)
  out.temp <- data %>% dplyr::filter(updown_dTAG_DMSO == "NO") %>% dplyr::select(c(1, 2, 3, 4, 5, 6))
  fwrite(out.temp, here(outDir, paste0(name, "_dTAGvsDMSO_NO_diff", diffCutoff, ".bedpe")), 
         sep = "\t", col.names = FALSE)
  out.temp <- data %>% dplyr::filter(updown_dTAG_DMSO == "DOWN") %>% dplyr::select(c(1, 2, 3, 4, 5, 6))
  fwrite(out.temp, here(outDir, paste0(name, "_dTAGvsDMSO_DOWN_diff", diffCutoff, ".bedpe")), 
         sep = "\t", col.names = FALSE)
  
  out.temp <- data %>% dplyr::filter(updown_A485_DMSO == "UP") %>% dplyr::select(c(1, 2, 3, 4, 5, 6))
  fwrite(out.temp, here(outDir, paste0(name, "_A485vsDMSO_UP_diff", diffCutoff, ".bedpe")), 
         sep = "\t", col.names = FALSE)
  out.temp <- data %>% dplyr::filter(updown_A485_DMSO == "NO") %>% dplyr::select(c(1, 2, 3, 4, 5, 6))
  fwrite(out.temp, here(outDir, paste0(name, "_A485vsDMSO_NO_diff", diffCutoff, ".bedpe")), 
         sep = "\t", col.names = FALSE)
  out.temp <- data %>% dplyr::filter(updown_A485_DMSO == "DOWN") %>% dplyr::select(c(1, 2, 3, 4, 5, 6))
  fwrite(out.temp, here(outDir, paste0(name, "_A485vsDMSO_DOWN_diff", diffCutoff, ".bedpe")), 
         sep = "\t", col.names = FALSE)
}

Distance vs score plot

dTAG
OE
# Import annotation
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))

# Import obs/exp scores and merge to the dataset
minValue <- -4
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
    dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                  log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
                  log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)))

# Merge dataset
data <- data %>% dplyr::left_join(obsexp, by = c("id"))

# Plot
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, log_obsexp_DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment_oe(temp, figDir, paste0(name, "_DMSO_logOE"), unique(data$Anno2), colorListLoop)

temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, log_obsexp_dTAG, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment_oe(temp, figDir, paste0(name, "_dTAG_logOE"), unique(data$Anno2), colorListLoop)


# Creating figures per each condition, differential
temp <- data %>% dplyr::mutate(distance = start2 - start1,
                               log_obsexp_diff_dTAG_DMSO = log_obsexp_dTAG - log_obsexp_DMSO) %>% 
  dplyr::select(distance, log_obsexp_diff_dTAG_DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgDiffScore_perTreatment_oe(temp, figDir, paste0(name, "_dTAG_logOE"), unique(data$Anno2), colorListLoop)
OE, grouped
# Import annotation
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))

# Import obs/exp scores and merge to the dataset
minValue <- -4
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
    dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                  log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
                  log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)))

# Merge dataset
data <- data %>% dplyr::left_join(obsexp, by = c("id"))

temp <- data %>% dplyr::mutate(distance = start2 - start1,
                               log_obsexp_diff_dTAG_DMSO = log_obsexp_dTAG - log_obsexp_DMSO) %>% 
  dplyr::select(distance, log_obsexp_diff_dTAG_DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")



temp <- temp %>% dplyr::mutate(Anno3 = case_when(Anno2 %in% c("S-X", "S-S") ~ "Structural",
                                                 Anno2 %in% c("P-P", "P-E", "E-E") ~ "Pure_Regulatory",
                                                 Anno2 %in% c("P-S", "P-X", "E-S", "E-X") ~ "Relaxed_Regulatory")) %>%
  dplyr::filter(!is.na(Anno3))

loopList <- rev(c("Structural", "Relaxed_Regulatory", "Pure_Regulatory"))
colorList <- rev(c(palette_3[["grey2"]], strong_green, darken(strong_green, amount = 0.5)))
avg_scores_long <- temp %>%
  group_by(distance, Anno3) %>%
  summarise(avg_score = mean(score, na.rm = TRUE)) %>%
  ungroup() 
avg_scores_long$Anno3 <- factor(avg_scores_long$Anno3, level = loopList)

p4 <- ggplot(avg_scores_long, aes(x = distance, y = avg_score, color = Anno3)) + 
  geom_hline(yintercept = 0,
             color = "black",
             size = lineThick*mmToLineUnit,
             lineend = "square") +
  geom_smooth(show.legend = TRUE, se = TRUE,
              size = lineThick*mmToLineUnit,
              lineend = "square",
              aes(fill = Anno3))  +
  theme_classic() + 
  scale_x_continuous(labels = label_kb_mb) +
  scale_color_manual(values = colorList) +
  scale_fill_manual(values = colorList) +
  labs(x = "Loop size",
       y = "Δ log2(obs/exp)",
       color = "Loop types", fill = "Loop types") +
  theme(
    axis.title = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  ) +
  guides(
    color = guide_legend(
      keywidth = 0.5,  # Adjust the width of the legend color squares
      keyheight = 0.5  # Adjust the height of the legend color squares
    )
  )





fileName <- paste0("dist_vs_score_difflinePlot_grouped", name)

width <- panelSize(2.5)*mmToInch
height <- panelSize(1.25)*mmToInch
png(here(figDir, paste0(fileName, ".png")), 
    res = 600, units = "in", width = width, height = height)
print(p4)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), 
        width = width, height = height)
print(p4)
dev.off()
Observed
# Import annotation
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))

# Import obs/exp scores and merge to the dataset
minValue <- -4
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
    dplyr::mutate(log_obs_DMSO = if_else(obs_DMSO == 0, minValue, log2(obs_DMSO)),
                  log_obs_dTAG = if_else(obs_dTAG == 0, minValue, log2(obs_dTAG)),
                  log_obs_A485 = if_else(obs_A485 == 0, minValue, log2(obs_A485)))

# Merge dataset
data <- data %>% dplyr::left_join(obsexp, by = c("id"))

# Plot
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, log_obs_DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment_oe(temp, figDir, paste0(name, "_DMSO_logObs"), unique(data$Anno2), colorListLoop)

temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, log_obs_dTAG, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment_oe(temp, figDir, paste0(name, "_dTAG_logObs"), unique(data$Anno2), colorListLoop)


# Creating figures per each condition, differential
temp <- data %>% dplyr::mutate(distance = start2 - start1,
                               log_obs_diff_dTAG_DMSO = log_obs_dTAG - log_obs_DMSO) %>% 
  dplyr::select(distance, log_obs_diff_dTAG_DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgDiffScore_perTreatment_oe(temp, figDir, paste0(name, "_dTAG_logObs"), unique(data$Anno2), colorListLoop)
A485
# Import annotation
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))

# Import obs/exp scores and merge to the dataset
minValue <- -4
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
    dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                  log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
                  log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)))

# Merge dataset
data <- data %>% dplyr::left_join(obsexp, by = c("id"))

# Plot
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, log_obsexp_A485, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment_oe(temp, figDir, paste0(name, "_A485_logOE"), unique(data$Anno2), colorListLoop)


# Creating figures per each condition, differential
temp <- data %>% dplyr::mutate(distance = start2 - start1,
                               log_obsexp_diff_A485_DMSO = log_obsexp_A485 - log_obsexp_DMSO) %>% 
  dplyr::select(distance, log_obsexp_diff_A485_DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgDiffScore_perTreatment_oe(temp, figDir, paste0(name, "_A485_logOE"), unique(data$Anno2), colorListLoop)

Get size distribution of differential loops

# Import annotation
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))

# Import obs/exp scores and merge to the dataset
minValue = -4
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
    dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                  log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
                  log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)))

# Merge dataset
data <- data %>% dplyr::left_join(obsexp, by = c("id")) %>%
  dplyr::mutate(log_obsexp_diff_dTAG_DMSO = log_obsexp_dTAG - log_obsexp_DMSO,
                log_obsexp_diff_A485_DMSO = log_obsexp_A485 - log_obsexp_DMSO)

diffCutoff <- 0.5
create_loop_scatterplot_oe(data, figDir, paste0(name, "_logOE_", "all"), unique(data$Anno2), diffCutoff)
create_loop_scatterplot_oe(data, figDir, paste0(name, "_logOE_", "reg"), c("P-P", "P-E", "E-E"), diffCutoff)
create_loop_scatterplot_oe(data, figDir, paste0(name, "_logOE_", "str"), c("S-S", "S-X"), diffCutoff)



create_dist_barplot_oe(data, figDir, paste0(name, "_logOE"), "all",  unique(data$Anno2), diffCutoff)
create_dist_barplot_oe(data, figDir, paste0(name, "_logOE"), "reg",  c("P-P", "P-E", "E-E"), diffCutoff)
create_dist_barplot_oe(data, figDir, paste0(name, "_logOE"), "reg_1mb",  c("P-P", "P-E", "E-E"), diffCutoff, 1e6)
create_dist_barplot_oe(data, figDir, paste0(name, "_logOE"), "str",  c("S-S", "S-X"), diffCutoff)
create_dist_barplot_oe(data, figDir, paste0(name, "_logOE"), "str_1mb",  c("S-S", "S-X"), diffCutoff, 1e6)

Splitting loops into diff bedpe

# Import annotation
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))

# Import obs/exp scores and merge to the dataset
minValue = -4
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
    dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                  log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
                  log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)))

# Merge dataset
data <- data %>% dplyr::left_join(obsexp, by = c("id")) %>%
  dplyr::mutate(log_obsexp_diff_dTAG_DMSO = log_obsexp_dTAG - log_obsexp_DMSO,
                log_obsexp_diff_A485_DMSO = log_obsexp_A485 - log_obsexp_DMSO)
diffCutoff <- 0.5
make_diff_bedpe_oe(data, paste0(name, "_logOE_", "all"), unique(data$Anno2), consensusDir, diffCutoff)
make_diff_bedpe_oe(data, paste0(name, "_logOE_", "pe-pe"), c("P-P", "P-E", "E-E"), consensusDir, diffCutoff)
make_diff_bedpe_oe(data, paste0(name, "_logOE_", "str"), c("S-S", "S-X"), consensusDir, diffCutoff)

[2.27] Extracting TSS for deeptools

group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene

flankSize <- 1
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3),
                TSSstart = TSS - flankSize,
                TSSend = TSS + flankSize) %>%
  dplyr::select(V1, TSSstart, TSSend, V6)
colnames(gene.tb) <- c("chr", "start", "end", "ensembl")

bed.1 <- gene.tb %>% dplyr::filter(ensembl %in% group1) %>% dplyr::select(c(1, 2, 3))
bed.2 <- gene.tb %>% dplyr::filter(ensembl %in% group2) %>% dplyr::select(c(1, 2, 3))

fwrite(bed.1, here(refDir, "TSS_binaryGroup1.bed"), sep = "\t", col.names = FALSE)
fwrite(bed.2, here(refDir, "TSS_binaryGroup2.bed"), sep = "\t", col.names = FALSE)



gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed"))

bed.1 <- gene.tb %>% dplyr::filter(V6 %in% group1)
bed.2 <- gene.tb %>% dplyr::filter(V6 %in% group2)

fwrite(bed.1, here(refDir, "TSS_binaryGroup1_gtf.bed"), sep = "\t", col.names = FALSE)
fwrite(bed.2, here(refDir, "TSS_binaryGroup2_gtf.bed"), sep = "\t", col.names = FALSE)

[2.28] Checking gene expression levels of binaryGroup

group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene

data <- fread(here(refDir, "readCount.filtered.TPM.all.tsv")) %>% dplyr::mutate(
  group = case_when(ensembl %in% group1 ~ "group1",
                    ensembl %in% group2 ~ "group2",
                    TRUE ~ NA)
) %>%
  dplyr::filter(!is.na(group)) %>%
  dplyr::select(c(34, 1, 3, 4, 5, 14, 15, 34))

data <- data %>% mutate(average = rowMeans(across(colnames(data)[3:7])))

temp <- data %>% dplyr::select(group, average)


getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$average
  distance2 <- (data %>% dplyr::filter(group ==group2) )$average
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

pval <- convPvalue(getPvalWilcox(temp, "group1", "group2"))

ggplot(temp, aes(x = group, y = average)) + geom_boxplot() + scale_y_log10() +
  ylab("avgTPM") + theme_classic() +
  annotate("text", x = 1, y = 10000, label = pval)

[2.29] Checking overlap with Stripes

group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene

flankSize <- 1
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3),
                TSSstart = TSS - flankSize,
                TSSend = TSS + flankSize) %>%
  dplyr::select(V1, TSSstart, TSSend, V6)
colnames(gene.tb) <- c("chr", "start", "end", "ensembl")

bed.1 <- gene.tb %>% dplyr::filter(ensembl %in% group1) %>% dplyr::select(c(1, 2, 3))
bed.2 <- gene.tb %>% dplyr::filter(ensembl %in% group2) %>% dplyr::select(c(1, 2, 3))


data <- fread(here("../..", "result", "stripenn", "result_filtered.tsv"))

ggplot(data, aes(x = width)) + geom_histogram() + theme_classic() +
 scale_x_continuous(labels = label_kb_mb)


## Checking overlap with gene TSS

tss <- fread(here(refDir, "mm10_GRCm38.p6_TSS2.5kb.bed"))

tss.group1 <- tss %>% dplyr::filter(V6 %in% group1) %>% dplyr::select(V1, V2, V3)
colnames(tss.group1) <- c("chr", "start", "end")
tss.group1.gr <- makeGRangesFromDataFrame(tss.group1)

tss.group2 <- tss %>% dplyr::filter(V6 %in% group2) %>% dplyr::select(V1, V2, V3)
colnames(tss.group2) <- c("chr", "start", "end")
tss.group2.gr <- makeGRangesFromDataFrame(tss.group2)

## Anchor of stripes
temp <- data %>% dplyr::select(chr, pos1, pos2)
colnames(temp) <- c("chr", "start", "end")

stripeAnchor.gr <- makeGRangesFromDataFrame(temp)

overlap.group1 <- findOverlaps(tss.group1.gr, stripeAnchor.gr)
overlap.group2 <- findOverlaps(tss.group2.gr, stripeAnchor.gr)

length(unique(queryHits(overlap.group1)))
length(unique(queryHits(overlap.group2)))


## body of stripes
temp <- data %>% dplyr::select(chr, pos3, pos4)
colnames(temp) <- c("chr", "start", "end")

stripeBody.gr <- makeGRangesFromDataFrame(temp)

overlap.group1 <- findOverlaps(tss.group1.gr, stripeBody.gr)
overlap.group2 <- findOverlaps(tss.group2.gr, stripeBody.gr)

length(unique(queryHits(overlap.group1)))
length(unique(queryHits(overlap.group2)))

## Checking RAD21/CTCF presence at anchor
overlap.body.ctcf <- findOverlaps(stripeBody.gr, peak.CTCF)
overlap.body.rad21 <- findOverlaps(stripeBody.gr, peak.RAD21)
overlap.anchor.ctcf <- findOverlaps(stripeAnchor.gr, peak.CTCF)
overlap.anchor.rad21 <- findOverlaps(stripeAnchor.gr, peak.RAD21)

length(unique(queryHits(overlap.body.ctcf)))
length(unique(queryHits(overlap.body.rad21)))

length(unique(queryHits(overlap.anchor.ctcf)))
length(unique(queryHits(overlap.anchor.rad21)))
  
## Checking overlap with loop anchor (PE-PE)
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_UP_diff0.2.bedpe"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_NO_diff0.2.bedpe"))
anchor.upno <- extractAnchor(bind_rows(loop.up, loop.no))

loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_DOWN_diff0.2.bedpe"))
anchor.down <- extractAnchor(loop.down)


overlap.body.upno <- findOverlaps(stripeBody.gr, anchor.upno)
overlap.body.down <- findOverlaps(stripeBody.gr, anchor.down)
overlap.anchor.upno <- findOverlaps(stripeAnchor.gr, anchor.upno)
overlap.anchor.down <- findOverlaps(stripeAnchor.gr, anchor.down)

length(unique(subjectHits(overlap.body.upno)))
length(unique(subjectHits(overlap.body.down)))
length(unique(subjectHits(overlap.anchor.upno)))
length(unique(subjectHits(overlap.anchor.down)))


## Checking overlap with loop anchor (ALL)
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_UP_diff0.2.bedpe"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_NO_diff0.2.bedpe"))
anchor.upno <- extractAnchor(bind_rows(loop.up, loop.no))

loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_DOWN_diff0.2.bedpe"))
anchor.down <- extractAnchor(loop.down)


overlap.body.upno <- findOverlaps(stripeBody.gr, anchor.upno)
overlap.body.down <- findOverlaps(stripeBody.gr, anchor.down)
overlap.anchor.upno <- findOverlaps(stripeAnchor.gr, anchor.upno)
overlap.anchor.down <- findOverlaps(stripeAnchor.gr, anchor.down)

length(unique(subjectHits(overlap.body.upno)))
length(unique(subjectHits(overlap.body.down)))
length(unique(subjectHits(overlap.anchor.upno)))
length(unique(subjectHits(overlap.anchor.down)))


## Checking overlap with loop anchor (Structure)
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure_dTAGvsDMSO_UP_diff0.2.bedpe"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure_dTAGvsDMSO_NO_diff0.2.bedpe"))
anchor.upno <- extractAnchor(bind_rows(loop.up, loop.no))

loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure_dTAGvsDMSO_DOWN_diff0.2.bedpe"))
anchor.down <- extractAnchor(loop.down)

overlap.body.upno <- findOverlaps(stripeBody.gr, anchor.upno)
overlap.body.down <- findOverlaps(stripeBody.gr, anchor.down)
overlap.anchor.upno <- findOverlaps(stripeAnchor.gr, anchor.upno)
overlap.anchor.down <- findOverlaps(stripeAnchor.gr, anchor.down)

length(unique(subjectHits(overlap.body.upno)))
length(unique(subjectHits(overlap.body.down)))
length(unique(subjectHits(overlap.anchor.upno)))
length(unique(subjectHits(overlap.anchor.down)))


length(unique(subjectHits(overlap.body.upno)))/length(anchor.upno)*100
length(unique(subjectHits(overlap.body.down)))/length(anchor.down)*100
length(unique(subjectHits(overlap.anchor.upno)))/length(anchor.upno)*100
length(unique(subjectHits(overlap.anchor.down)))/length(anchor.down)*100


## Checking overlap with loop anchor (RelaxedReg)
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_regulatory_dTAGvsDMSO_UP_diff0.2.bedpe"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_regulatory_dTAGvsDMSO_NO_diff0.2.bedpe"))
anchor.upno <- extractAnchor(bind_rows(loop.up, loop.no))

loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_regulatory_dTAGvsDMSO_DOWN_diff0.2.bedpe"))
anchor.down <- extractAnchor(loop.down)

overlap.body.upno <- findOverlaps(stripeBody.gr, anchor.upno)
overlap.body.down <- findOverlaps(stripeBody.gr, anchor.down)
overlap.anchor.upno <- findOverlaps(stripeAnchor.gr, anchor.upno)
overlap.anchor.down <- findOverlaps(stripeAnchor.gr, anchor.down)

length(unique(subjectHits(overlap.body.upno)))
length(unique(subjectHits(overlap.body.down)))
length(unique(subjectHits(overlap.anchor.upno)))
length(unique(subjectHits(overlap.anchor.down)))


length(unique(subjectHits(overlap.body.upno)))/length(anchor.upno)*100
length(unique(subjectHits(overlap.body.down)))/length(anchor.down)*100
length(unique(subjectHits(overlap.anchor.upno)))/length(anchor.upno)*100
length(unique(subjectHits(overlap.anchor.down)))/length(anchor.down)*100

[2.30] Hubs from Dylan Paper

All loops

results <- tibble(i = numeric(), upno = numeric(), down = numeric())

for(i in seq(1, 10)){
  data <- fread(here(refDir, "Dylan_hub_esc.csv"))
  
  data.hub <- data %>% dplyr::filter(all_lcon > i)
  temp <- data.hub %>% dplyr::select(c(1, 2, 3))
  colnames(temp) <- c("chr", "start", "end")
  hub.anchor <- makeGRangesFromDataFrame(temp)
  
  
  
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_UP_diff0.2.bedpe"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_NO_diff0.2.bedpe"))
anchor.upno <- extractAnchor(bind_rows(loop.up, loop.no))

loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_DOWN_diff0.2.bedpe"))
anchor.down <- extractAnchor(loop.down)

  
  overlap.upno <- findOverlaps(anchor.upno, hub.anchor)
  overlap.down <- findOverlaps(anchor.down, hub.anchor)
  
  n1 <- length(unique(queryHits(overlap.upno)))
  n2 <- length(unique(queryHits(overlap.down)))
  
  perc1 <- round(n1/length(overlap.upno)*100, 2)
  perc2 <- round(n2/length(overlap.down)*100, 2)
  
  results <- results %>% add_row(i = i, upno = perc1, down = perc2)
}



results_long <- results %>%
  pivot_longer(cols = c(upno, down), names_to = "Group", values_to = "Percentage")

results_long$Group <- factor(results_long$Group, levels = c("upno", "down"))
# Create the bar plot
ggplot(results_long, aes(x = factor(i), y = Percentage, fill = Group)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(
    title = "All loops",
    x = "Presence of hub anchor with >i connections",
    y = "Percentage"
  ) +
  theme_minimal() +
  scale_fill_manual(values = c("upno" = "blue", "down" = "red")) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) + ylim(0, 100)
Fishers
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_UP_diff0.2.bedpe"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_NO_diff0.2.bedpe"))
loop.upno <- (bind_rows(loop.up, loop.no))

loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_DOWN_diff0.2.bedpe"))

getOverlapLoopNum <- function(loop, peak){
  anchor1 <- GRanges(seqnames = loop$V1, ranges = IRanges(start = loop$V2, end = loop$V3))
  anchor2 <- GRanges(seqnames = loop$V4, ranges = IRanges(start = loop$V5, end = loop$V6))
  a <- queryHits(findOverlaps(anchor1, peak))
  b <- queryHits(findOverlaps(anchor2, peak))
  return(length(unique(c(a, b))))
}

getSEOverlapFisher <- function(allLoop, subsetLoop, peak){
  all.overlap <- getOverlapLoopNum(allLoop, peak)
  all.notOverlap <- nrow(allLoop) - all.overlap
  
  subset.overlap <- getOverlapLoopNum(subsetLoop, peak)
  subset.notOverlap <- nrow(subsetLoop) - subset.overlap
  
  contingency_table <- matrix(c(subset.overlap, subset.notOverlap,
                                all.overlap, all.notOverlap), nrow = 2, byrow = TRUE)
  colnames(contingency_table) <- c("Overlapping", "Not_Overlapping")
  rownames(contingency_table) <- c("All loops", "Subset loops")
  
  # Perform Fisher's Exact Test
  fisher_test_result <- fisher.test(contingency_table)
  return(fisher_test_result)
}

name <- "chromo_cons_annoHierarchy"
loop <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe"))


i <- 2
data <- fread(here(refDir, "Dylan_hub_esc.csv"))

data.hub <- data %>% dplyr::filter(all_lcon > i)
temp <- data.hub %>% dplyr::select(c(1, 2, 3))
colnames(temp) <- c("chr", "start", "end")
hub.anchor <- makeGRangesFromDataFrame(temp)


temp <- getSEOverlapFisher(loop, loop.upno, hub.anchor)
result.tb <- tibble(loopType = "UP/NO",
                    target = paste0("hub_", i),
                    pvalue = temp$p.value,
                    oddsRatio = temp$estimate)
temp <- getSEOverlapFisher(loop, loop.down, hub.anchor)
result.tb <- result.tb %>% 
  add_row(loopType = "DOWN",
          target =  paste0("hub_", i),
          pvalue = temp$p.value,
          oddsRatio = temp$estimate)


for(i in c(5)){
  data <- fread(here(refDir, "Dylan_hub_esc.csv"))
  
  data.hub <- data %>% dplyr::filter(all_lcon > i)
  temp <- data.hub %>% dplyr::select(c(1, 2, 3))
  colnames(temp) <- c("chr", "start", "end")
  hub.anchor <- makeGRangesFromDataFrame(temp)
  
  
  temp <- getSEOverlapFisher(loop, loop.upno, hub.anchor)
  result.tb <- result.tb %>% add_row(loopType = "UP/NO",
                      target = paste0("hub_", i),
                      pvalue = temp$p.value,
                      oddsRatio = temp$estimate)
  temp <- getSEOverlapFisher(loop, loop.down, hub.anchor)
  result.tb <- result.tb %>% 
    add_row(loopType = "DOWN",
            target =  paste0("hub_", i),
            pvalue = temp$p.value,
            oddsRatio = temp$estimate)
  
  
}

library(circlize)
data <- result.tb
heatmap_data <- data %>% dplyr::select(target, loopType, oddsRatio) %>%
pivot_wider(names_from = loopType, values_from = oddsRatio) %>%
  column_to_rownames(var = "target")

pvalue_data <- data %>% dplyr::select(target, loopType, pvalue) %>%
  pivot_wider(names_from = loopType, values_from = pvalue) %>%
  column_to_rownames(var = "target")

# col_fun <- colorRamp2(c(0, 1, 2), 
#                       c("blue", "white", "red"))

data$target <- factor(data$target, levels = c("hub_5", "hub_2"))
data$loopType <- factor(data$loopType, levels = c("UP/NO", "DOWN"))
p <- ggplot(data, aes(x = loopType, y = target, size = -log10(pvalue), fill = oddsRatio)) +
  geom_point(shape = 21,        # Ensures a point with an outline
             stroke = 1*ptToMM      # Line width for the border
  ) + theme_bw() + 
  scale_size_continuous(range = c(1, 3)) +  # Set min and max point sizes here
  scale_fill_gradientn(colors = c("#4852A0", "white", "#CB333A"),  # Define gradient colors
                      values = scales::rescale(c(0.5, 1, 1.5)), limits = c(0.5, 1.5), 
                      #low = "white", high = "#CB333A",
                      #                  limits = c(1, 3),
                      oob = scales::squish, # Define gradient colors
                      guide = guide_colorbar(
                        barwidth = 1.5/5.08,  # Adjust width of the color bar
                        barheight = 15/5.08   # Adjust height of the color bar
                      )
  ) + labs(x = NULL, y = NULL)  +
  theme(
    axis.title = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )


fileName <- here(figDir, "heatmap_hub_enrichment_dotplot_all")
width <- panelSize(1.65)*mmToInch
height <- panelSize(1.1)*mmToInch
# # png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
# # print(p)
# # dev.off()
svglite(paste0(fileName, ".svg"),  height = height, width = width)
print(p)
dev.off()

PE-PE loops

results <- tibble(i = numeric(), upno = numeric(), down = numeric())

for(i in seq(1, 10)){
  data <- fread(here(refDir, "Dylan_hub_esc.csv"))
  
  data.hub <- data %>% dplyr::filter(all_lcon > i)
  temp <- data.hub %>% dplyr::select(c(1, 2, 3))
  colnames(temp) <- c("chr", "start", "end")
  hub.anchor <- makeGRangesFromDataFrame(temp)
  
  
  
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_UP_diff0.2.bedpe"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_NO_diff0.2.bedpe"))
anchor.upno <- extractAnchor(bind_rows(loop.up, loop.no))

loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_DOWN_diff0.2.bedpe"))
anchor.down <- extractAnchor(loop.down)


  
  overlap.upno <- findOverlaps(anchor.upno, hub.anchor)
  overlap.down <- findOverlaps(anchor.down, hub.anchor)
  
  n1 <- length(unique(queryHits(overlap.upno)))
  n2 <- length(unique(queryHits(overlap.down)))
  
  perc1 <- round(n1/length(overlap.upno)*100, 2)
  perc2 <- round(n2/length(overlap.down)*100, 2)
  
  results <- results %>% add_row(i = i, upno = perc1, down = perc2)
}



results_long <- results %>%
  pivot_longer(cols = c(upno, down), names_to = "Group", values_to = "Percentage")
results_long$Group <- factor(results_long$Group, levels = c("upno", "down"))

# Create the bar plot
ggplot(results_long, aes(x = factor(i), y = Percentage, fill = Group)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(
    title = "PE-PE loops",
    x = "Presence of hub anchor with >i connections",
    y = "Percentage"
  ) +
  theme_minimal() +
  scale_fill_manual(values = c("upno" = "blue", "down" = "red")) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) + ylim(0, 100)

Str loops

results <- tibble(i = numeric(), upno = numeric(), down = numeric())

for(i in seq(1, 10)){
  data <- fread(here(refDir, "Dylan_hub_esc.csv"))
  
  data.hub <- data %>% dplyr::filter(all_lcon > i)
  temp <- data.hub %>% dplyr::select(c(1, 2, 3))
  colnames(temp) <- c("chr", "start", "end")
  hub.anchor <- makeGRangesFromDataFrame(temp)
  
  
  
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure_dTAGvsDMSO_UP_diff0.2.bedpe"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure_dTAGvsDMSO_NO_diff0.2.bedpe"))
anchor.upno <- extractAnchor(bind_rows(loop.up, loop.no))

loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure_dTAGvsDMSO_DOWN_diff0.2.bedpe"))
anchor.down <- extractAnchor(loop.down)

  
  overlap.upno <- findOverlaps(anchor.upno, hub.anchor)
  overlap.down <- findOverlaps(anchor.down, hub.anchor)
  
  n1 <- length(unique(queryHits(overlap.upno)))
  n2 <- length(unique(queryHits(overlap.down)))
  
  perc1 <- round(n1/length(overlap.upno)*100, 2)
  perc2 <- round(n2/length(overlap.down)*100, 2)
  
  results <- results %>% add_row(i = i, upno = perc1, down = perc2)
}



results_long <- results %>%
  pivot_longer(cols = c(upno, down), names_to = "Group", values_to = "Percentage")
results_long$Group <- factor(results_long$Group, levels = c("upno", "down"))

# Create the bar plot
ggplot(results_long, aes(x = factor(i), y = Percentage, fill = Group)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(
    title = "Structure loops",
    x = "Presence of hub anchor with >i connections",
    y = "Percentage"
  ) +
  theme_minimal() +
  scale_fill_manual(values = c("upno" = "blue", "down" = "red")) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) + ylim(0, 100)

TSS

results <- tibble(i = numeric(), perc1 = numeric(), perc2 = numeric())

for(i in seq(1, 10)){
  data <- fread(here(refDir, "Dylan_hub_episc.csv"))
  
  data.hub <- data %>% dplyr::filter(all_lcon > i)
  temp <- data.hub %>% dplyr::select(c(1, 2, 3))
  colnames(temp) <- c("chr", "start", "end")
  hub.anchor <- makeGRangesFromDataFrame(temp)
  
  
  
  ## Checking overlap with gene TSS
  tss <- fread(here(refDir, "mm10_GRCm38.p6_TSS2.5kb.bed"))
  
  tss.group1 <- tss %>% dplyr::filter(V6 %in% group1) %>% dplyr::select(V1, V2, V3)
  colnames(tss.group1) <- c("chr", "start", "end")
  tss.group1.gr <- makeGRangesFromDataFrame(tss.group1)
  
  tss.group2 <- tss %>% dplyr::filter(V6 %in% group2) %>% dplyr::select(V1, V2, V3)
  colnames(tss.group2) <- c("chr", "start", "end")
  tss.group2.gr <- makeGRangesFromDataFrame(tss.group2)
  
  
  overlap.group1 <- findOverlaps(tss.group1.gr, hub.anchor)
  overlap.group2 <- findOverlaps(tss.group2.gr, hub.anchor)
  
  n1 <- length(unique(queryHits(overlap.group1)))
  n2 <- length(unique(queryHits(overlap.group2)))
  
  perc1 <- round(n1/nrow(tss.group1)*100, 2)
  perc2 <- round(n2/nrow(tss.group2)*100, 2)
  
  results <- results %>% add_row(i = i, perc1 = perc1, perc2 = perc2)
}



results_long <- results %>%
  pivot_longer(cols = c(perc1, perc2), names_to = "Group", values_to = "Percentage")

# Create the bar plot
ggplot(results_long, aes(x = factor(i), y = Percentage, fill = Group)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(
    title = "Percentage by 'i' for perc1 and perc2",
    x = "Presence of hub anchor with >i connections",
    y = "Percentage"
  ) +
  theme_minimal() +
  scale_fill_manual(values = c("perc1" = "blue", "perc2" = "red")) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

###[2.30] H3K27ac peak number per 10kb

process_group <- function(group_name, gene_data, bins) {
  # Filter for the specific group
  TSS_group <- makeGRangesFromDataFrame(
    gene_data %>% filter(ensembl %in% get(group_name)), 
    keep.extra.columns = TRUE
  )
  
  # Process each TSS in the group
  all_results <- map_dfr(seq_len(length(TSS_group)), function(i) {
    # Find overlapping bins
    overlapping_bins <- findOverlaps(TSS_group[i], bins)
    bins_near_tss <- as_tibble(bins[subjectHits(overlapping_bins)])
    
    # Calculate distance bin
    TSScenter <- (as_tibble(TSS_group[i]) %>%
                    mutate(center = (start + end) / 2))$center
    centerBinStart <- floor(TSScenter / 10e3) * 10e3 + 1
    bins_near_tss <- bins_near_tss %>%
      mutate(distanceBin = abs((start - centerBinStart)) / 10e3)
    
    # Summarize results
    result <- bins_near_tss %>%
      group_by(distanceBin) %>%
      summarise(mean_peak_counts = mean(peak_counts, na.rm = TRUE), .groups = "drop") %>%
      mutate(gene = TSS_group[i]$ensembl)
    return(result)
  })
  
  # Calculate mean and SD for the group
  mean_data <- all_results %>%
    group_by(distanceBin) %>%
    summarise(mean_peak_counts = mean(mean_peak_counts, na.rm = TRUE))
  
  sd_data <- all_results %>%
    group_by(distanceBin) %>%
    summarise(sd_peak_counts = sd(mean_peak_counts, na.rm = TRUE))
  
  # Join and add group name
  summary_data <- left_join(mean_data, sd_data, by = "distanceBin") %>%
    mutate(group = group_name)
  
  return(summary_data)
}

##########################################################



## Import peak
refDir <- here("../..", "reference")
#peak.H3K27ac <- importPeak(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.narrowPeak.bed"))
peak.H3K27ac <- importPeak(here(refDir, "33255_H3K4me3_04-745_Bruce-4_peaks.mergePeak.bed"))
#peak.H3K27ac <- importPeak(here(refDir, "33250_RAD21_ab992_Bruce-4_peaks.mergePeak.bed"))

## Import 10kb bin
temp <- fread(here(refDir, "mm10.bin.10kb.bed"))
colnames(temp) <- c("chr", "start", "end")
temp <- temp %>% dplyr::mutate(start = start+1)
bins.10kb <- makeGRangesFromDataFrame(temp)

## Count overlap
counts <- countOverlaps(bins.10kb, peak.H3K27ac)
mcols(bins.10kb)$peak_counts <- counts

## Getting TSS
flankSize <- 1e6
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3),
                TSSstart = TSS - flankSize,
                TSSend = TSS + flankSize) %>%
  dplyr::select(V1, TSSstart, TSSend, V6)
colnames(gene.tb) <- c("chr", "start", "end", "ensembl")

gene.group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
gene.group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene

#TSS1mb.group1.gr <- makeGRangesFromDataFrame(gene.tb %>% dplyr::filter(ensembl %in% gene.group1), keep.extra.columns = TRUE)
#TSS1mb.group2.gr <- makeGRangesFromDataFrame(gene.tb %>% dplyr::filter(ensembl %in% gene.group2), keep.extra.columns = TRUE)

##########################################################
# List of groups to process
groups <- c("gene.group1", "gene.group2")

# Process each group and combine results
summary_data <- map_dfr(groups, ~ process_group(.x, gene.tb, bins.10kb))

# Add the additional rows for distanceBin = 0
summary_data <- summary_data %>%
  mutate(distanceBin = distanceBin + 1) %>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "gene.group1") %>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "gene.group2")

# Plot the results
p <- ggplot(summary_data, aes(x = distanceBin, y = mean_peak_counts, color = group)) + 
  geom_line(size = 0.5) +
  labs(
    x = "Distance bin (10kb)",
    y = "H3K4me3 peak count"
  ) +
  theme_classic() +
  theme(
    axis.title = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS),
    legend.position = c(0.8, 0.8)  # Moves legend inside the plot (x, y relative coordinates)
  ) +
  guides(
    fill = guide_legend(
      keywidth = 0.2,  # Adjust the width of the legend keys
      keyheight = 0.2  # Adjust the height of the legend keys
    ))+
  scale_color_manual(values = rev(c("#777777", "#F28E2C")))

##########################################################

fileName <- paste0("binPeakDensity_RAD21_binaryGroup")
width <- 32*mmToInch
height <- 35*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

####### DOing this for P-S groups
resultDir <- here("../../result")
temp2 <- readRDS(here(resultDir, "gene_loop_link.rds"))

psOver4 <- (temp2 %>% dplyr::filter(num_ps >= 4))$gene
psOver3 <- (temp2 %>% dplyr::filter(num_ps >= 3, num_ps < 4))$gene
psOver2 <- (temp2 %>% dplyr::filter(num_ps >= 2, num_ps < 3))$gene
psOver1 <- (temp2 %>% dplyr::filter(num_ps >= 1, num_ps < 2))$gene
psOver0 <- (temp2 %>% dplyr::filter(num_ps < 1))$gene

# List of groups to process
groups <- c("psOver0", "psOver1", "psOver2", "psOver3", "psOver4")

# Process each group and combine results
summary_data <- map_dfr(groups, ~ process_group(.x, gene.tb, bins.10kb))

# Add the additional rows for distanceBin = 0
summary_data <- summary_data %>%
  mutate(distanceBin = distanceBin + 1) %>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "psOver0") %>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "psOver1") %>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "psOver2") %>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "psOver3")%>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "psOver4")

# Plot the results
p <- ggplot(summary_data, aes(x = distanceBin, y = mean_peak_counts, color = group)) + 
  geom_line(size = 0.5) +
  labs(
    x = "Distance bin (10kb)",
    y = "H3K4me3 peak count"
  ) +
  theme_classic() +
  theme(
    axis.title = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS),
    legend.position = c(0.8, 0.8)  # Moves legend inside the plot (x, y relative coordinates)
  ) +
  guides(
    fill = guide_legend(
      keywidth = 0.2,  # Adjust the width of the legend keys
      keyheight = 0.2  # Adjust the height of the legend keys
    ))+
  scale_color_manual(values = c("#777777", "#8B7E65", "#A28452", "#C2884D", "#F28E2C"))

fileName <- paste0("binPeakDensity_RAD21_psGroup11")
width <- 32*mmToInch
height <- 35*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

### PE
peOver4 <- (temp2 %>% dplyr::filter(num_pe >= 4))$gene
peOver3 <- (temp2 %>% dplyr::filter(num_pe >= 3, num_pe < 4))$gene
peOver2 <- (temp2 %>% dplyr::filter(num_pe >= 2, num_pe < 3))$gene
peOver1 <- (temp2 %>% dplyr::filter(num_pe >= 1, num_pe < 2))$gene
peOver0 <- (temp2 %>% dplyr::filter(num_pe < 1))$gene

# List of groups to process
groups <- c("peOver0", "peOver1", "peOver2", "peOver3", "peOver4")

# Process each group and combine results
summary_data <- map_dfr(groups, ~ process_group(.x, gene.tb, bins.10kb))

# Add the additional rows for distanceBin = 0
summary_data <- summary_data %>%
  mutate(distanceBin = distanceBin + 1) %>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "peOver0") %>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "peOver1") %>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "peOver2") %>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "peOver3")%>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "peOver4")

# Plot the results
ggplot(summary_data, aes(x = distanceBin, y = mean_peak_counts, color = group)) + 
  geom_line(size = 1) + xlim(0, 10)+
  labs(
    title = "Mean Peak Counts",
    x = "Distance Bin (10 kb)",
    y = "Mean Peak Counts"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
    axis.text = element_text(size = 12),
    axis.title = element_text(size = 13)
  )

###############
ppOver4 <- (temp2 %>% dplyr::filter(num_pp >= 4))$gene
ppOver3 <- (temp2 %>% dplyr::filter(num_pp >= 3, num_pp < 4))$gene
ppOver2 <- (temp2 %>% dplyr::filter(num_pp >= 2, num_pp < 3))$gene
ppOver1 <- (temp2 %>% dplyr::filter(num_pp >= 1, num_pp < 2))$gene
ppOver0 <- (temp2 %>% dplyr::filter(num_pp < 1))$gene

# List of groups to process
groups <- c("ppOver0", "ppOver1", "ppOver2", "ppOver3", "ppOver4")

# Process each group and combine results
summary_data <- map_dfr(groups, ~ process_group(.x, gene.tb, bins.10kb))

# Add the additional rows for distanceBin = 0
summary_data <- summary_data %>%
  mutate(distanceBin = distanceBin + 1) %>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "ppOver0") %>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "ppOver1") %>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "ppOver2") %>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "ppOver3")%>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "ppOver4")

# Plot the results
ggplot(summary_data, aes(x = distanceBin, y = mean_peak_counts, color = group)) + 
  geom_line(size = 1) +
  labs(
    title = "Mean Peak Counts",
    x = "Distance Bin (10 kb)",
    y = "Mean Peak Counts"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
    axis.text = element_text(size = 12),
    axis.title = element_text(size = 13)
  )

[2.31] Testing SEs?

peak.Whyte.SE <- importPeak(here(refDir, "superEnhancer_Whyte_ESC_mm10.bed"))
peak.Dylan.SE <- importPeak(here(refDir, "superEnhancer_Dylan_ESC.bed"))



flankSize <- 0.5e6
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3),
                TSSstart = TSS - flankSize,
                TSSend = TSS + flankSize) %>%
  dplyr::select(V1, TSSstart, TSSend, V6, V5)
colnames(gene.tb) <- c("chr", "start", "end", "ensembl", "gene")

gene.group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
gene.group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene

TSS1mb.group1.gr <- makeGRangesFromDataFrame(gene.tb %>% dplyr::filter(ensembl %in% gene.group1), keep.extra.columns = TRUE)
TSS1mb.group2.gr <- makeGRangesFromDataFrame(gene.tb %>% dplyr::filter(ensembl %in% gene.group2), keep.extra.columns = TRUE)


length(unique(queryHits(findOverlaps(TSS1mb.group1.gr, peak.Whyte.SE))))/length(gene.group1)*100
length(unique(queryHits(findOverlaps(TSS1mb.group2.gr, peak.Whyte.SE))))/length(gene.group2)*100

length(unique(queryHits(findOverlaps(TSS1mb.group1.gr, peak.Dylan.SE))))/length(gene.group1)*100
length(unique(queryHits(findOverlaps(TSS1mb.group2.gr, peak.Dylan.SE))))/length(gene.group2)*100

#View(as_tibble(TSS1mb.group1.gr[unique(queryHits(findOverlaps(TSS1mb.group1.gr, peak.Whyte.SE)))]))
#View(as_tibble(TSS1mb.group1.gr[unique(queryHits(findOverlaps(TSS1mb.group1.gr, peak.Dylan.SE)))]))

gene.group1.SE <- (as_tibble(TSS1mb.group1.gr[unique(queryHits(findOverlaps(TSS1mb.group1.gr, peak.Dylan.SE)))]))$ensembl
gene.group1.noSE <- gene.group1[!(gene.group1 %in% gene.group1.SE)]

overlaps <- findOverlaps(TSS1mb.group2.gr, peak.Dylan.SE)
unique_hits <- unique(queryHits(overlaps))
gene.group2.SE <- as_tibble(TSS1mb.group2.gr[unique_hits])$ensembl
gene.group2.noSE <- as_tibble(TSS1mb.group2.gr[-unique_hits])$ensembl


##########################################################
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3),
                TSSstart = TSS - 1e6,
                TSSend = TSS + 1e6) %>%
  dplyr::select(V1, TSSstart, TSSend, V6, V5)
colnames(gene.tb) <- c("chr", "start", "end", "ensembl", "gene")

##########################################################
# List of groups to process
groups <- c("gene.group1.SE", "gene.group1.noSE")

# Process each group and combine results
summary_data <- map_dfr(groups, ~ process_group(.x, gene.tb, bins.10kb))

# Add the additional rows for distanceBin = 0
summary_data <- summary_data %>%
  mutate(distanceBin = distanceBin + 1) %>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "gene.group1.SE") %>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "gene.group1.noSE")

# Plot the results
ggplot(summary_data, aes(x = distanceBin, y = mean_peak_counts, color = group)) + 
  geom_line(size = 1) +
  labs(
    title = paste0("Mean Peak Counts, +-", flankSize, "bp, Whyte SE"),
    x = "Distance Bin (10 kb)",
    y = "Mean Peak Counts"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
    axis.text = element_text(size = 12),
    axis.title = element_text(size = 13)
  )

##########################################################
# List of groups to process
groups <- c("gene.group1.SE", "gene.group1.noSE", "gene.group2.SE", "gene.group2.noSE")

# Process each group and combine results
summary_data <- map_dfr(groups, ~ process_group(.x, gene.tb, bins.10kb))

# Add the additional rows for distanceBin = 0
summary_data <- summary_data %>%
  mutate(distanceBin = distanceBin + 1) %>%
    add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "gene.group1.SE") %>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "gene.group1.noSE") %>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "gene.group2.SE") %>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "gene.group2.noSE")

# Plot the results
ggplot(summary_data, aes(x = distanceBin, y = mean_peak_counts, color = group)) + 
  geom_line(size = 1) +
  labs(
    title = paste0("Mean Peak Counts, +-", flankSize, "bp, Whyte SE"),
    x = "Distance Bin (10 kb)",
    y = "Mean Peak Counts"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
    axis.text = element_text(size = 12),
    axis.title = element_text(size = 13)
  )
##########################################################
# List of groups to process
groups <- c("gene.group2.SE", "gene.group2.noSE")

# Process each group and combine results
summary_data <- map_dfr(groups, ~ process_group(.x, gene.tb, bins.10kb))

# Add the additional rows for distanceBin = 0
summary_data <- summary_data %>%
  mutate(distanceBin = distanceBin + 1) %>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "gene.group2.SE") %>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "gene.group2.noSE")

# Plot the results
ggplot(summary_data, aes(x = distanceBin, y = mean_peak_counts, color = group)) + 
  geom_line(size = 1) +
  labs(
    title = paste0("Mean Peak Counts, +-", flankSize, "bp, Whyte SE"),
    x = "Distance Bin (10 kb)",
    y = "Mean Peak Counts"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
    axis.text = element_text(size = 12),
    axis.title = element_text(size = 13)
  )

## Checking P-S in the subset of genes

resultDir <- here("../../result")
temp2 <- readRDS(here(resultDir, "gene_loop_link.rds"))

temp3 <- temp2 %>% dplyr::filter(gene %in% c(gene.group1)) %>%
  dplyr::mutate(group = case_when(gene %in% gene.group1.SE ~ "SE",
                                  TRUE ~ "noSE"))


ggplot(temp3, aes(x = group, y = num_ps)) + geom_violin(aes(fill = group)) + geom_boxplot(width = 0.1) + theme_classic()

getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$num_pp
  distance2 <- (data %>% dplyr::filter(group ==group2) )$num_pp
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}


getPvalWilcox(temp3, "noSE", "SE")

[2.32] Checking directionality

name <- "chromo_cons_annoHierarchy"
diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = diffCutoff)%>% 
  dplyr::mutate(distance = start2 - start1)


geneAnnoData <- geneAnnoData %>% unnest(gene)


gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3)) %>%
  dplyr::select(V6, TSS)
colnames(gene.tb) <- c("ensembl", "TSS")


geneAnnoData  <- geneAnnoData %>% dplyr::left_join(gene.tb, by = c("gene" = "ensembl"))

data <- geneAnnoData %>% dplyr::select(c(1, 2, 3, 4, 5, 6, 29, 31, 35)) %>%
  rowwise() %>%
  dplyr::mutate(distToAnchor1 = (start1 + end1)/2 - TSS,
                distToAnchor2 = (start2 + end2)/2 - TSS,
                distToAnchor = if_else(abs(distToAnchor1) > abs(distToAnchor2), distToAnchor1, distToAnchor2),
                direction = if_else(distToAnchor > 0, "right", "left"))

data.directionality <- data %>% group_by(gene) %>%
  summarise(count = n(),
            n_right = sum(direction == "right"),
            n_left = sum(direction == "left")) %>%
  dplyr::filter(count > 1) %>%
  dplyr::mutate(n = n_right + n_left,
                directionality = abs((n_right - n_left)/n))


#ggplot(data.directionality, aes(x = directionality)) + geom_histogram() + theme_classic()


gene.group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
gene.group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene

temp <- data.directionality %>% dplyr::filter(gene %in% c(gene.group1, gene.group2)) %>%
  dplyr::mutate(group = if_else(gene %in% gene.group1, "group1", "group2"))


ggplot(temp, aes(x = group, y = directionality)) + geom_violin() + geom_boxplot(width = 0.1) + 
  geom_hline(yintercept = 0) + theme_classic() + ggtitle("P-S") +
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")


#########################PS

getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$directionality
  distance2 <- (data %>% dplyr::filter(group ==group2) )$directionality
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}


resultDir <- here("../../result")
temp2 <- readRDS(here(resultDir, "gene_loop_link.rds"))
#####################

psOver4 <- (temp2 %>% dplyr::filter(num_ps >= 4))$gene
psOver3 <- (temp2 %>% dplyr::filter(num_ps >= 3, num_ps < 4))$gene
psOver2 <- (temp2 %>% dplyr::filter(num_ps >= 2, num_ps < 3))$gene
psOver1 <- (temp2 %>% dplyr::filter(num_ps >= 1, num_ps < 2))$gene
psOver0 <- (temp2 %>% dplyr::filter(num_ps < 1))$gene

temp <- data.directionality %>% dplyr::filter(gene %in% c(psOver0, psOver1, psOver2, psOver3, psOver4)) %>%
  dplyr::mutate(group = case_when(gene %in% psOver0 ~ "psOver0",
                                  gene %in% psOver1 ~ "psOver1",
                                  gene %in% psOver2 ~ "psOver2",
                                  gene %in% psOver3 ~ "psOver3",
                                  gene %in% psOver4 ~ "psOver4"))

p34 <- round(getPvalWilcox(temp, "psOver3", "psOver4"), 5)
p23 <- round(getPvalWilcox(temp, "psOver2", "psOver3"), 5)
p12 <- round(getPvalWilcox(temp, "psOver1", "psOver2"), 5)
p01 <- round(getPvalWilcox(temp, "psOver0", "psOver1"), 5)
p04 <- round(getPvalWilcox(temp, "psOver0", "psOver4"), 5)



ggplot(temp, aes(x = group, y = directionality)) + geom_violin() + geom_boxplot(width = 0.1) + 
  geom_hline(yintercept = 0) + theme_classic() + ggtitle("P-N") +
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  annotate("text", x = 1, y = 0.5, label = paste0("p34: ", convPvalue(p34), "\n",
                                                "p23: ", convPvalue(p23), "\n",
                                                "p12: ", convPvalue(p12), "\n",
                                                "p01: ", convPvalue(p01), "\n",
                                                "p04: ", convPvalue(p04), "\n"),
           color = "black", hjust = 0, size = 3)


#################### pe

peOver4 <- (temp2 %>% dplyr::filter(num_pe >= 4))$gene
peOver3 <- (temp2 %>% dplyr::filter(num_pe >= 3, num_pe < 4))$gene
peOver2 <- (temp2 %>% dplyr::filter(num_pe >= 2, num_pe < 3))$gene
peOver1 <- (temp2 %>% dplyr::filter(num_pe >= 1, num_pe < 2))$gene
peOver0 <- (temp2 %>% dplyr::filter(num_pe < 1))$gene

temp <- data.directionality %>% dplyr::filter(gene %in% c(peOver0, peOver1, peOver2, peOver3, peOver4)) %>%
  dplyr::mutate(group = case_when(gene %in% peOver0 ~ "peOver0",
                                  gene %in% peOver1 ~ "peOver1",
                                  gene %in% peOver2 ~ "peOver2",
                                  gene %in% peOver3 ~ "peOver3",
                                  gene %in% peOver4 ~ "peOver4"))

p34 <- round(getPvalWilcox(temp, "peOver3", "peOver4"), 5)
p23 <- round(getPvalWilcox(temp, "peOver2", "peOver3"), 5)
p12 <- round(getPvalWilcox(temp, "peOver1", "peOver2"), 5)
p01 <- round(getPvalWilcox(temp, "peOver0", "peOver1"), 5)
p04 <- round(getPvalWilcox(temp, "peOver0", "peOver4"), 5)



ggplot(temp, aes(x = group, y = directionality)) + geom_violin() + geom_boxplot(width = 0.1) + 
  geom_hline(yintercept = 0) + theme_classic() + ggtitle("P-N") +
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  annotate("text", x = 1, y = 0.5, label = paste0("p34: ", convPvalue(p34), "\n",
                                                "p23: ", convPvalue(p23), "\n",
                                                "p12: ", convPvalue(p12), "\n",
                                                "p01: ", convPvalue(p01), "\n",
                                                "p04: ", convPvalue(p04), "\n"),
           color = "black", hjust = 0, size = 3)


#################### pp

ppOver4 <- (temp2 %>% dplyr::filter(num_pp >= 4))$gene
ppOver3 <- (temp2 %>% dplyr::filter(num_pp >= 3, num_pp < 4))$gene
ppOver2 <- (temp2 %>% dplyr::filter(num_pp >= 2, num_pp < 3))$gene
ppOver1 <- (temp2 %>% dplyr::filter(num_pp >= 1, num_pp < 2))$gene
ppOver0 <- (temp2 %>% dplyr::filter(num_pp < 1))$gene

temp <- data.directionality %>% dplyr::filter(gene %in% c(ppOver0, ppOver1, ppOver2, ppOver3, ppOver4)) %>%
  dplyr::mutate(group = case_when(gene %in% ppOver0 ~ "ppOver0",
                                  gene %in% ppOver1 ~ "ppOver1",
                                  gene %in% ppOver2 ~ "ppOver2",
                                  gene %in% ppOver3 ~ "ppOver3",
                                  gene %in% ppOver4 ~ "ppOver4"))

p34 <- round(getPvalWilcox(temp, "ppOver3", "ppOver4"), 5)
p23 <- round(getPvalWilcox(temp, "ppOver2", "ppOver3"), 5)
p12 <- round(getPvalWilcox(temp, "ppOver1", "ppOver2"), 5)
p01 <- round(getPvalWilcox(temp, "ppOver0", "ppOver1"), 5)
p04 <- round(getPvalWilcox(temp, "ppOver0", "ppOver4"), 5)



ggplot(temp, aes(x = group, y = directionality)) + geom_violin() + geom_boxplot(width = 0.1) + 
  geom_hline(yintercept = 0) + theme_classic() + ggtitle("P-N") +
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  annotate("text", x = 1, y = 0.5, label = paste0("p34: ", convPvalue(p34), "\n",
                                                "p23: ", convPvalue(p23), "\n",
                                                "p12: ", convPvalue(p12), "\n",
                                                "p01: ", convPvalue(p01), "\n",
                                                "p04: ", convPvalue(p04), "\n"),
           color = "black", hjust = 0, size = 3)

[2.33] Checking guidance

name <- "chromo_cons_annoHierarchy"
diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = diffCutoff)%>% 
  dplyr::mutate(distance = start2 - start1)

geneAnnoData <- geneAnnoData %>% unnest(gene)

gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3)) %>%
  dplyr::select(V6, TSS)
colnames(gene.tb) <- c("ensembl", "TSS")


geneAnnoData  <- geneAnnoData %>% dplyr::left_join(gene.tb, by = c("gene" = "ensembl"))

data <- geneAnnoData %>% dplyr::select(c(1, 2, 3, 4, 5, 6, 29, 31, 35)) %>%
  rowwise() %>%
  dplyr::mutate(distToAnchor1 = (start1 + end1)/2 - TSS,
                distToAnchor2 = (start2 + end2)/2 - TSS,
                distToAnchor = if_else(abs(distToAnchor1) > abs(distToAnchor2), distToAnchor1, distToAnchor2),
                direction = if_else(distToAnchor > 0, "right", "left"))


result <- data %>%
  group_by(gene, direction) %>%
  mutate(distToAnchor = abs(distToAnchor)) %>%
  mutate(max_dist_P_S = ifelse(any(Anno2 == "P-S"), 
                               max(distToAnchor[Anno2 == "P-S"], na.rm = TRUE), 
                               NA)) %>%
  mutate(is_smaller = ifelse(is.na(max_dist_P_S), NA, 
                             ifelse(distToAnchor == max_dist_P_S, NA, distToAnchor < max_dist_P_S))) %>%
  ungroup()



result2 <- result %>% dplyr::filter(!is.na(is_smaller))

result3 <- result2 %>% group_by(gene, direction) %>%
  summarize(count_true = sum(is_smaller, na.rm = TRUE),
            count_false = sum(is_smaller == FALSE, na.rm = TRUE)) %>%
  dplyr::mutate(count = count_true + count_false,
                perc = count_true/count*100) %>%
  dplyr::select(gene, direction, perc)

result3 <- result3 %>% group_by(gene) %>% summarize(perc = mean(perc))

####### DOing this for P-S groups
resultDir <- here("../../result")
temp2 <- readRDS(here(resultDir, "gene_loop_link.rds"))

psOver4 <- (temp2 %>% dplyr::filter(num_ps >= 4))$gene
psOver3 <- (temp2 %>% dplyr::filter(num_ps >= 3, num_ps < 4))$gene
psOver2 <- (temp2 %>% dplyr::filter(num_ps >= 2, num_ps < 3))$gene
psOver1 <- (temp2 %>% dplyr::filter(num_ps >= 1, num_ps < 2))$gene
psOver0 <- (temp2 %>% dplyr::filter(num_ps < 1))$gene


result4 <- result3 %>% dplyr::mutate(group = case_when(gene %in% psOver0 ~ "psOver0",
                                            gene %in% psOver1 ~ "psOver1",
                                            gene %in% psOver2 ~ "psOver2",
                                            gene %in% psOver3 ~ "psOver3",
                                            gene %in% psOver4 ~ "psOver4",
                                            TRUE ~ NA)) %>%
  dplyr::filter(!is.na(group))


ggplot(result4, aes(x =  group, y = perc)) + geom_violin() + geom_boxplot(width = 0.05) + theme_classic() +
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")

####### DOing this for P-E groups
peOver4 <- (temp2 %>% dplyr::filter(num_pe >= 4))$gene
peOver3 <- (temp2 %>% dplyr::filter(num_pe >= 3, num_pe < 4))$gene
peOver2 <- (temp2 %>% dplyr::filter(num_pe >= 2, num_pe < 3))$gene
peOver1 <- (temp2 %>% dplyr::filter(num_pe >= 1, num_pe < 2))$gene
peOver0 <- (temp2 %>% dplyr::filter(num_pe < 1))$gene


result4 <- result3 %>% dplyr::mutate(group = case_when(gene %in% peOver0 ~ "peOver0",
                                            gene %in% peOver1 ~ "peOver1",
                                            gene %in% peOver2 ~ "peOver2",
                                            gene %in% peOver3 ~ "peOver3",
                                            gene %in% peOver4 ~ "peOver4",
                                            TRUE ~ NA)) %>%
  dplyr::filter(!is.na(group))


ggplot(result4, aes(x =  group, y = perc)) + geom_violin() + geom_boxplot(width = 0.05) + theme_classic() +
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")

####### DOing this for P-P groups

ppOver4 <- (temp2 %>% dplyr::filter(num_pp >= 4))$gene
ppOver3 <- (temp2 %>% dplyr::filter(num_pp >= 3, num_pp < 4))$gene
ppOver2 <- (temp2 %>% dplyr::filter(num_pp >= 2, num_pp < 3))$gene
ppOver1 <- (temp2 %>% dplyr::filter(num_pp >= 1, num_pp < 2))$gene
ppOver0 <- (temp2 %>% dplyr::filter(num_pp < 1))$gene


result4 <- result3 %>% dplyr::mutate(group = case_when(gene %in% ppOver0 ~ "ppOver0",
                                            gene %in% ppOver1 ~ "ppOver1",
                                            gene %in% ppOver2 ~ "ppOver2",
                                            gene %in% ppOver3 ~ "ppOver3",
                                            gene %in% ppOver4 ~ "ppOver4",
                                            TRUE ~ NA)) %>%
  dplyr::filter(!is.na(group))


ggplot(result4, aes(x =  group, y = perc)) + geom_violin() + geom_boxplot(width = 0.05) + theme_classic() +
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")

result <- data %>%
  group_by(gene, direction) %>%
  mutate(distToAnchor = abs(distToAnchor)) %>%
  mutate(max_dist_P_S = ifelse(any(Anno2 == "P-S"), 
                               max(distToAnchor[Anno2 == "P-S"], na.rm = TRUE), 
                               NA))
result <- result %>% dplyr::filter(Anno2 != "P-S") %>%
  mutate(is_smaller = ifelse(is.na(max_dist_P_S), NA, 
                             ifelse(distToAnchor == max_dist_P_S, NA, distToAnchor < max_dist_P_S))) %>%
  ungroup()



result2 <- result %>% dplyr::filter(!is.na(is_smaller))

result3 <- result2 %>% group_by(gene, direction) %>%
  summarize(count_true = sum(is_smaller, na.rm = TRUE),
            count_false = sum(is_smaller == FALSE, na.rm = TRUE)) %>%
  dplyr::mutate(count = count_true + count_false,
                perc = count_true/count*100) %>%
  dplyr::select(gene, direction, perc)

result3 <- result3 %>% group_by(gene) %>% summarize(perc = mean(perc))

####### DOing this for P-S groups
resultDir <- here("../../result")
temp2 <- readRDS(here(resultDir, "gene_loop_link.rds"))

psOver4 <- (temp2 %>% dplyr::filter(num_ps >= 4))$gene
psOver3 <- (temp2 %>% dplyr::filter(num_ps >= 3, num_ps < 4))$gene
psOver2 <- (temp2 %>% dplyr::filter(num_ps >= 2, num_ps < 3))$gene
psOver1 <- (temp2 %>% dplyr::filter(num_ps >= 1, num_ps < 2))$gene
psOver0 <- (temp2 %>% dplyr::filter(num_ps < 1))$gene


result4 <- result3 %>% dplyr::mutate(group = case_when(gene %in% psOver0 ~ "psOver0",
                                            gene %in% psOver1 ~ "psOver1",
                                            gene %in% psOver2 ~ "psOver2",
                                            gene %in% psOver3 ~ "psOver3",
                                            gene %in% psOver4 ~ "psOver4",
                                            TRUE ~ NA)) %>%
  dplyr::filter(!is.na(group))


ggplot(result4, aes(x =  group, y = perc)) + geom_violin() + geom_boxplot(width = 0.05) + theme_classic() +
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")

####### DOing this for P-E groups
peOver4 <- (temp2 %>% dplyr::filter(num_pe >= 4))$gene
peOver3 <- (temp2 %>% dplyr::filter(num_pe >= 3, num_pe < 4))$gene
peOver2 <- (temp2 %>% dplyr::filter(num_pe >= 2, num_pe < 3))$gene
peOver1 <- (temp2 %>% dplyr::filter(num_pe >= 1, num_pe < 2))$gene
peOver0 <- (temp2 %>% dplyr::filter(num_pe < 1))$gene


result4 <- result3 %>% dplyr::mutate(group = case_when(gene %in% peOver0 ~ "peOver0",
                                            gene %in% peOver1 ~ "peOver1",
                                            gene %in% peOver2 ~ "peOver2",
                                            gene %in% peOver3 ~ "peOver3",
                                            gene %in% peOver4 ~ "peOver4",
                                            TRUE ~ NA)) %>%
  dplyr::filter(!is.na(group))


ggplot(result4, aes(x =  group, y = perc)) + geom_violin() + geom_boxplot(width = 0.05) + theme_classic() +
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")

####### DOing this for P-P groups

ppOver4 <- (temp2 %>% dplyr::filter(num_pp >= 4))$gene
ppOver3 <- (temp2 %>% dplyr::filter(num_pp >= 3, num_pp < 4))$gene
ppOver2 <- (temp2 %>% dplyr::filter(num_pp >= 2, num_pp < 3))$gene
ppOver1 <- (temp2 %>% dplyr::filter(num_pp >= 1, num_pp < 2))$gene
ppOver0 <- (temp2 %>% dplyr::filter(num_pp < 1))$gene


result4 <- result3 %>% dplyr::mutate(group = case_when(gene %in% ppOver0 ~ "ppOver0",
                                            gene %in% ppOver1 ~ "ppOver1",
                                            gene %in% ppOver2 ~ "ppOver2",
                                            gene %in% ppOver3 ~ "ppOver3",
                                            gene %in% ppOver4 ~ "ppOver4",
                                            TRUE ~ NA)) %>%
  dplyr::filter(!is.na(group))


ggplot(result4, aes(x =  group, y = perc)) + geom_violin() + geom_boxplot(width = 0.05) + theme_classic() +
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")

[2.34] Checking CTCF motif orientation

Calculating CTCF motif

peak.CTCF <- importPeak(here(refDir, "33248_CTCF_07-729_Bruce-4_peaks.mergePeak.bed"))

library(BSgenome.Mmusculus.UCSC.mm10)

# Extract sequences
sequences <- getSeq(BSgenome.Mmusculus.UCSC.mm10, peak.CTCF)


# Get the CTCF motif (or provide your own PWM)
ctcf_motif <- query(MotifDb, c("CTCF", "Mmusculus"))[[1]]

# Scan for the motif
motif_hits <- lapply(sequences, function(seq) {
    matchPWM(ctcf_motif, seq, min.score = "80%") # Adjust min.score as needed
})

motif_hits_reverse <- lapply(sequences, function(seq) {
    matchPWM(ctcf_motif, reverseComplement(seq), min.score = "80%")
})

# Combine results
result <- mapply(function(fwd, rev) {
    list(forward = fwd, reverse = rev)
}, motif_hits, motif_hits_reverse, SIMPLIFY = FALSE)


result[[5]]


result_tibble <- tibble(
    peak_id = seq_along(result), # Create an identifier for each peak
    alignment = map_chr(result, function(res) {
        has_fwd <- length(res$forward) > 0
        has_rev <- length(res$reverse) > 0
        
        if (has_fwd && has_rev) "fwdrev"
        else if (has_fwd) "fwd"
        else if (has_rev) "rev"
        else "none"
    })
)

temp <- as_tibble(peak.CTCF) %>% bind_cols(result_tibble) %>%
  dplyr::select(seqnames, start, end, alignment)

fwrite(temp, here(refDir, "33248_CTCF_07-729_Bruce-4_peaks.mergePeak.motifAnnotated.bed"), col.names = FALSE, sep = "\t")

Comparing it to TSS

ctcf.peak <- fread(here(refDir, "33248_CTCF_07-729_Bruce-4_peaks.mergePeak.motifAnnotated.bed"))
colnames(ctcf.peak) <- c("chr", "start", "end", "motif")
ctcf.peak.gr <- makeGRangesFromDataFrame(ctcf.peak, keep.extra.columns = TRUE)


flankSize <- 1
gene.TSS.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3),
                TSSstart = TSS - flankSize,
                TSSend = TSS + flankSize) %>%
  dplyr::select(V1, TSSstart, TSSend, V4, V5, V6)
colnames(gene.TSS.tb) <- c("chr", "start", "end", "strand", "gene", "ensembl")

tss.gr <- makeGRangesFromDataFrame(gene.TSS.tb, keep.extra.columns = TRUE)

fwd_peaks <- ctcf.peak.gr[ctcf.peak.gr$motif %in% c("fwd", "fwdrev")]
rev_peaks <- ctcf.peak.gr[ctcf.peak.gr$motif %in% c("rev", "fwdrev")]

# Find convergent CTCF pairs for each TSS
convergent_pairs <- lapply(seq_along(tss.gr), function(i) {
    current_tss <- tss.gr[i]
    
    # Filter peaks on the same chromosome as the TSS
    fwd_peaks_chr <- fwd_peaks[as.character(seqnames(fwd_peaks)) == as.character(seqnames(current_tss))]
    rev_peaks_chr <- rev_peaks[as.character(seqnames(rev_peaks)) == as.character(seqnames(current_tss))]
    
    # Find the closest forward peak to the left of the TSS
    left_fwd <- fwd_peaks_chr[start(fwd_peaks_chr) < start(current_tss)]
    closest_fwd <- if (length(left_fwd) > 0) {
      left_fwd[which.max(start(left_fwd))]
    } else {
      NA
    }
    
    # Find the closest reverse peak to the right of the TSS
    right_rev <- rev_peaks_chr[start(rev_peaks_chr) > start(current_tss)]
    closest_rev <- if (length(right_rev) > 0) {
      right_rev[which.min(start(right_rev))]
    } else {
      NA
    }
    
    # Combine results if both closest peaks exist
    if (!is.na(closest_fwd) && !is.na(closest_rev)) {
        tibble(
          chrom1 = as.character(seqnames(closest_fwd)),
          start1 = start(closest_fwd),
          end1 = end(closest_fwd),
          chrom2 = as.character(seqnames(closest_rev)),
          start2 = start(closest_rev),
          end2 = end(closest_rev),
          ensembl = current_tss$ensembl,
        )
    } else {
        NULL
    }
})

# Combine results into a tibble
result_tibble <- bind_rows(convergent_pairs)

fwrite(result_tibble, here(refDir, "tss_convergent_ctcf_motif_pairs.bedpe"), sep = "\t", col.names = FALSE)
fwrite(gene.TSS.tb %>% dplyr::select(c(1, 2, 3, 5)), here(refDir, "tss_2bp.bed"), sep = "\t", col.names = FALSE)

temp <- fread(here(refDir, "tss_convergent_ctcf_motif_pairs.bedpe")) %>% dplyr::filter(V7 == "Klf4")

fwrite(as_tibble(temp), here(refDir, "tss_convergent_ctcf_motif_pairs_klf4.bedpe"), sep = "\t", col.names = FALSE)

# Checking size distribution of ctcf pair
temp <- fread(here(refDir, "tss_convergent_ctcf_motif_pairs.bedpe"))
temp <- temp %>% dplyr::mutate(size = (V6 + V5)/2 - (V3 + V2)/2,
                       id = paste(V1, V2, V3, V5, V6, sep = "_"))
temp <- temp %>% dplyr::select(id, size) %>% distinct()

ggplot(temp, aes(x = size)) + geom_histogram() + scale_x_continuous(labels = label_kb_mb, limits = c(0, 1e6)) + scale_y_log10() + theme_classic() + ggtitle("size distribution of smallest convergent ctcf pairs around gene")

Filtering loops that fall within CTCF boundaries

flankSize <- 1
gene.TSS.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3),
                TSSstart = TSS - flankSize,
                TSSend = TSS + flankSize) %>%
  dplyr::select(V1, TSSstart, TSSend, V4, V5, V6)
colnames(gene.TSS.tb) <- c("chr", "start", "end", "strand", "gene", "ensembl")
geneEnsemblPair <- gene.TSS.tb %>% dplyr::select(gene, ensembl)

boundary.pair <- fread(here(refDir, "tss_convergent_ctcf_motif_pairs.bedpe"))
colnames(boundary.pair) <- c("boundary_chrom1", "boundary_start1", "boundary_end1", "boundary_chrom2", "boundary_start2", "boundary_end2", "ensembl")
boundary.pair <- boundary.pair %>% dplyr::left_join(geneEnsemblPair, by = c("ensembl"))


# Importing loop gene annotation
name <- "chromo_cons_annoHierarchy"
diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = diffCutoff,
                                      annoList = c("P-P", "P-E")) %>%
  dplyr::select(c(1, 2, 3, 4, 5, 6, 11, 12, 24, 29, 31)) %>% unnest(gene)


data <- geneAnnoData %>% dplyr::left_join(boundary.pair, by = c("gene" = "ensembl"))

data <- data %>% rowwise() %>% dplyr::mutate(isWithinLeftBd = (boundary_start1 <= min(end1, end2)),
                       isWithinRightBd = (boundary_end2 >= max(start1, start2)),
                       isWithinBd = isWithinLeftBd & isWithinRightBd)
data <- data %>% drop_na()


ggplot(data, aes(x = DMSO, y = dTAG)) + geom_point() + coord_fixed() +  
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  theme_classic() + facet_wrap(~ isWithinBd)

ggplot(data, aes(x = isWithinBd, y = diff_dTAG_DMSO, fill = isWithinBd)) + geom_violin() + geom_boxplot(width = 0.1) + theme_classic() +
  geom_hline(yintercept = 0)


getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(isWithinBd ==group1) )$diff_dTAG_DMSO
  distance2 <- (data %>% dplyr::filter(isWithinBd ==group2) )$diff_dTAG_DMSO
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

getPvalWilcox(data, TRUE, FALSE)


# Proportion of those gene between group1 and group2
data.isWithinBd <- data %>% dplyr::filter(isWithinBd)

gene.group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
gene.group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene

sum(gene.group1 %in% data.isWithinBd$gene)/length(gene.group1)*100

sum(gene.group2 %in% data.isWithinBd$gene)/length(gene.group2)*100

# Checking size of those loops
data <- data %>% dplyr::mutate(distance = start2 - start1)

ggplot(data, aes(x = isWithinBd, y = distance, fill = isWithinBd)) + geom_violin() + geom_boxplot(width = 0.1) + theme_classic()

[2.35] Checking CTCF motif presence at gene TSS

flankSize <- 2.5e3
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3),
                TSSstart = TSS - flankSize,
                TSSend = TSS + flankSize) %>%
  dplyr::select(V1, TSSstart, TSSend, V6, V5)
colnames(gene.tb) <- c("chr", "start", "end", "ensembl", "gene")

gene.group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
gene.group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene

TSS1mb.group1.gr <- makeGRangesFromDataFrame(gene.tb %>% dplyr::filter(ensembl %in% gene.group1), keep.extra.columns = TRUE)
TSS1mb.group2.gr <- makeGRangesFromDataFrame(gene.tb %>% dplyr::filter(ensembl %in% gene.group2), keep.extra.columns = TRUE)


ctcf.peak <- fread(here(refDir, "33248_CTCF_07-729_Bruce-4_peaks.mergePeak.motifAnnotated.bed"))
colnames(ctcf.peak) <- c("chr", "start", "end", "motif")
ctcf.peak.gr <- makeGRangesFromDataFrame(ctcf.peak)


print("+-2.5kb TSS overla with CTCF peak")
length(unique(queryHits(findOverlaps(TSS1mb.group1.gr, ctcf.peak.gr))))/length(gene.group1)*100
length(unique(queryHits(findOverlaps(TSS1mb.group2.gr, ctcf.peak.gr))))/length(gene.group2)*100



ctcf.peak.gr <- makeGRangesFromDataFrame(ctcf.peak %>% dplyr::filter(motif != "none"))
print("+-2.5kb TSS overla with CTCF peak with motif")
length(unique(queryHits(findOverlaps(TSS1mb.group1.gr, ctcf.peak.gr))))/length(gene.group1)*100
length(unique(queryHits(findOverlaps(TSS1mb.group2.gr, ctcf.peak.gr))))/length(gene.group2)*100

[2.36] Finding farthest P-S

name <- "chromo_cons_annoHierarchy"
diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = diffCutoff,
                                      annoList = c("P-P", "P-E", "P-S", "P-X")) %>%
  dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 11, 12, 24, 29, 31)) %>% unnest(gene)


gene.TSS.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3)) %>%
  dplyr::select(V6, TSS)
colnames(gene.TSS.tb) <- c("ensembl", "TSS")


geneAnnoData <- geneAnnoData %>% dplyr::left_join(gene.TSS.tb, by = c("gene" = "ensembl"))

geneAnnoData <- geneAnnoData %>% 
  dplyr::mutate(distance1 = (start1 + end1)/2 - TSS,
                distance2 = (start2 + end2)/2 - TSS,
                distance = if_else(abs(distance1) > abs(distance2), distance1, distance2))

result <- geneAnnoData %>%
  filter(Anno2 == "P-S") %>%
  group_by(gene) %>%
  summarise(
    farRight = if (any(distance > 0)) max(distance[distance > 0]) else 0,  # Return 0 if no positive distance
    farLeft = if (any(distance < 0)) min(distance[distance < 0]) else 0  # Return 0 if no negative distance
  ) %>%
  ungroup()

geneAnnoData <- geneAnnoData %>% left_join(result, by = c("gene"))

geneAnnoData <- geneAnnoData %>% dplyr::filter(Anno2 %in% c("P-P", "P-E")) %>%
  dplyr::mutate(group = case_when(is.na(farRight) ~ "No",
                                  (distance > farLeft) & (distance < farRight) ~ "Within",
                                  TRUE ~ "Outside"),
                size = start2 - start1)

data <- geneAnnoData %>% dplyr::select(group, id, size) %>% distinct()
anchor.data <- geneAnnoData %>% dplyr::select(chrom1, start1, end1, chrom2, start2, end2, group) %>% distinct()

temp1 <- anchor.data %>% dplyr::filter(group == "No") %>% dplyr::select(-group)
fwrite(temp1, here(consensusDir, "insulated_domain_ps_no.bedpe"), sep = "\t", col.names = FALSE)
temp2 <- anchor.data %>% dplyr::filter(group == "Within") %>% dplyr::select(-group)
fwrite(temp2, here(consensusDir, "insulated_domain_ps_within.bedpe"), sep = "\t", col.names = FALSE)
temp3 <- anchor.data %>% dplyr::filter(group == "Outside") %>% dplyr::select(-group)
fwrite(temp3, here(consensusDir, "insulated_domain_ps_outside.bedpe"), sep = "\t", col.names = FALSE)


getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$size
  distance2 <- (data %>% dplyr::filter(group ==group2) )$size
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

# convPvalue(getPvalWilcox(data, "noBoundary", "withinBoundary"))
# convPvalue(getPvalWilcox(data, "noBoundary", "outsideBoundary"))
# convPvalue(getPvalWilcox(data, "outsideBoundary", "withinBoundary"))


p <- ggplot(data, aes(x = group, y = size, fill = group)) + 
  geom_violin(linewidth = lineThick * mmToLineUnit, lineend = "square", alpha = .4, , show.legend = FALSE) + 
  geom_boxplot(width = 0.3, color = "black",
                 linewidth = lineThick * mmToLineUnit, lineend = "square",
                 outlier.shape = NA,  alpha = 0.6, show.legend = FALSE) + 
  theme_classic() + scale_y_continuous(labels = label_kb_mb) +
  stat_summary(
      aes(group = group), fun = mean,
      geom = "point", shape = 21, size = 0.5,
      fill = "black", color = "black", position = position_dodge(.3)
    ) +
  scale_fill_manual(values = c("grey50", "#5EC962", "#5EC962")) +

  theme(
    axis.title.x = element_blank(),
    axis.title.y = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),    
    axis.text.x = element_text(
      size = fontSizeM,
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  ) +labs(y = "Loop size")

fileName <- paste0("insulationBoundary_size")
width <- panelSize(1.5)*mmToInch
height <- panelSize(1.5)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
  
########################
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$diff_dTAG_DMSO
  distance2 <- (data %>% dplyr::filter(group ==group2) )$diff_dTAG_DMSO
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

data <- geneAnnoData %>% dplyr::select(group, id, diff_dTAG_DMSO) %>% distinct()

# convPvalue(getPvalWilcox(data, "noBoundary", "withinBoundary"))
# convPvalue(getPvalWilcox(data, "noBoundary", "outsideBoundary"))
# convPvalue(getPvalWilcox(data, "outsideBoundary", "withinBoundary"))

p <- ggplot(data, aes(x = group, y = diff_dTAG_DMSO, fill = group)) + 
  geom_violin(linewidth = lineThick * mmToLineUnit, lineend = "square", alpha = .4, , show.legend = FALSE) + 
  geom_boxplot(width = 0.3, color = "black",
               linewidth = lineThick * mmToLineUnit, lineend = "square",
               outlier.shape = NA,  alpha = 0.6, show.legend = FALSE) + 
  theme_classic() + 
  stat_summary(
    aes(group = group), fun = mean,
    geom = "point", shape = 21, size = 0.5,
    fill = "black", color = "black", position = position_dodge(.3)
  ) +
  scale_fill_manual(values = c("grey50", "#5EC962", "#5EC962")) +

  theme(
    axis.title.x = element_blank(),
    axis.title.y = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),    
    axis.text.x = element_text(
      size = fontSizeM,
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  ) +labs(y = "Δ loop score")  +    
  geom_hline(yintercept = 0,
             alpha = 1,
             color = "black",
             size = lineThick*mmToLineUnit,
             lineend = "square") + 
  geom_hline(yintercept = -0.2,
             alpha = 0.5, 
             color = "black",
             size = lineThick*mmToLineUnit,
             lineend = "square", linetype = "dashed") +
  coord_cartesian(ylim = c(-0.8, 0.5))
    
fileName <- paste0("insulationBoundary_delta")
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()  
  
  
 ###################### 
    
    minValue <- -4
  obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
  dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
                log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)),
                log_obsexp_diff_dTAG_DMSO = log_obsexp_dTAG - log_obsexp_DMSO,
                log_obsexp_diff_A485_DMSO = log_obsexp_A485 - log_obsexp_DMSO)

geneAnnoData <- geneAnnoData %>% left_join(obsexp, by = c("id"))

data <- geneAnnoData %>% dplyr::select(group, id, log_obsexp_diff_dTAG_DMSO) %>% distinct()


getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$log_obsexp_diff_dTAG_DMSO
  distance2 <- (data %>% dplyr::filter(group ==group2) )$log_obsexp_diff_dTAG_DMSO
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

# convPvalue(getPvalWilcox(data, "noBoundary", "withinBoundary"))
# convPvalue(getPvalWilcox(data, "noBoundary", "outsideBoundary"))
# convPvalue(getPvalWilcox(data, "outsideBoundary", "withinBoundary"))
# 

p <- ggplot(data, aes(x = group, y = log_obsexp_diff_dTAG_DMSO, fill = group)) + 
  geom_violin(linewidth = lineThick * mmToLineUnit, lineend = "square", alpha = .4, , show.legend = FALSE) + 
  geom_boxplot(width = 0.3, color = "black",
               linewidth = lineThick * mmToLineUnit, lineend = "square",
               outlier.shape = NA,  alpha = 0.6, show.legend = FALSE) + 
  theme_classic() + 
  stat_summary(
    aes(group = group), fun = mean,
    geom = "point", shape = 21, size = 0.5,
    fill = "black", color = "black", position = position_dodge(.3)
  ) +
  scale_fill_manual(values = c("grey50", "#5EC962", "#5EC962")) +

  theme(
    axis.title.x = element_blank(),
    axis.title.y = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),    
    axis.text.x = element_text(
      size = fontSizeM,
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  ) +labs(y = "log2(fc of obs/exp)")  +    
  geom_hline(yintercept = 0,
             alpha = 1,
             color = "black",
             size = lineThick*mmToLineUnit,
             lineend = "square") + 
  coord_cartesian(ylim = c(-2, 2))
    
fileName <- paste0("insulationBoundary_obsexp")
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()  





#####################


gene.withinBoundary <- unique((geneAnnoData %>% dplyr::filter(group == "withinBoundary"))$gene)
gene.outsideBoundary <- unique((geneAnnoData %>% dplyr::filter(group == "outsideBoundary"))$gene)
gene.noBoundary <- unique((geneAnnoData %>% dplyr::filter(group == "noBoundary"))$gene)

gene.wBoundary <- unique(c(gene.withinBoundary, gene.outsideBoundary))

gene.group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
gene.group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene

print("perc of gene with loop within Boundary")
sum(gene.group1 %in% gene.withinBoundary)/length(gene.group1)*100
sum(gene.group2 %in% gene.withinBoundary)/length(gene.group2)*100

print("perc of gene with loop outside Boundary")
sum(gene.group1 %in% gene.outsideBoundary)/length(gene.group1)*100
sum(gene.group2 %in% gene.outsideBoundary)/length(gene.group2)*100

print("perc of gene with loop with no Boundary")
sum(gene.group1 %in% gene.noBoundary)/length(gene.group1)*100
sum(gene.group2 %in% gene.noBoundary)/length(gene.group2)*100


diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name) %>%
  dplyr::mutate(group = case_when(
    ensembl_gene_id %in% gene.wBoundary ~ "withinBoundary",
    ensembl_gene_id %in% gene.noBoundary ~ "noBoundary",
    TRUE ~ NA
  )) %>%
  dplyr::filter(!is.na(group)) %>%
  dplyr::mutate(absLog2FC = abs(log2FoldChange))


ggplot(diff.RNA, aes(x = group, y = log2FoldChange)) + geom_violin() + geom_boxplot(width = 0.1)

getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$absLog2FC
  distance2 <- (data %>% dplyr::filter(group ==group2) )$absLog2FC
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}
convPvalue(getPvalWilcox(diff.RNA, "withinBoundary", "noBoundary"))


# Create the CDF plot
p <- ggplot(diff.RNA, aes(x = absLog2FC, color = group)) +
stat_ecdf(size = 0.4 ) + # Use stat_ecdf to plot the empirical CDF
  labs(
    x = "Absolute log2(fold change)",
    y = "Cumulative Probability"
  ) + coord_cartesian(xlim = c(0, 0.5)) +
  theme_classic() + # Clean theme
  theme(
      axis.title = element_text(
        size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
    )

fileName <- paste0("log2FC_cdf_insulationBoundary")
width <- panelSize(2.5)*mmToInch
height <- panelSize(1.5)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()



### Checking characterstics of anchors
temp1 <- anchor.data %>% dplyr::filter(group == "noBoundary") %>% dplyr::select(-group)
anchor.noBoundary <- extractAnchor(temp1)

temp2 <- anchor.data %>% dplyr::filter(group == "withinBoundary") %>% dplyr::select(-group)
anchor.withinBoundary <- extractAnchor(temp2)

temp3 <- anchor.data %>% dplyr::filter(group == "outsideBoundary") %>% dplyr::select(-group)
anchor.outsideBoundary <- extractAnchor(temp3)


getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(loop ==group1) )$sumScore
  distance2 <- (data %>% dplyr::filter(loop ==group2) )$sumScore
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

getSumScores <- function(track, anchor) {
  # Find overlaps between all anchors and track regions at once
  overlaps <- findOverlaps(anchor, track)
  
  # Extract the scores and corresponding anchor indices
  anchor_indices <- queryHits(overlaps)
  track_scores <- score(track)[subjectHits(overlaps)]
  
  # Use tapply to calculate the median scores for each anchor
  median_scores <- tapply(track_scores, anchor_indices, mean, na.rm = TRUE)
  
  # Initialize a numeric vector to store the median scores for each anchor
  all_median_scores <- rep(NA, length(anchor))
  
  # Populate the median scores for the anchors that have overlaps
  all_median_scores[as.numeric(names(median_scores))] <- median_scores
  
  return(all_median_scores)
}

plotSumScoresBinary <- function(track, peak, name, anchor1, anchor2, anchor3){
  peakTrack <- track[unique(queryHits(findOverlaps(track, peak)))]
  a <- getSumScores(peakTrack, anchor1)
  b <- getSumScores(peakTrack, anchor2)
  c <- getSumScores(peakTrack, anchor3)
  a.tb <- tibble(loop = "noBoundary",
                 sumScore = a)
  b.tb <- tibble(loop = "withBoundary",
                 sumScore = b)
  c.tb <- tibble(loop = "outsideBoundary",
                 sumScore = c)
  
  data <- bind_rows(a.tb, b.tb, c.tb) %>% drop_na()
  data$loop <- factor(data$loop, levels = c("noBoundary", "withBoundary", "outsideBoundary"))
  
  p12 <- getPvalWilcox(data, "noBoundary", "withBoundary")
  p13 <- getPvalWilcox(data, "noBoundary", "outsideBoundary")
  p23 <- getPvalWilcox(data, "withBoundary", "outsideBoundary")
  p <-  ggplot(data, aes(x = loop, y = sumScore)) + 
    labs(x = NULL, y = paste0(name, "\nlog(avg peak score per anchor)")) +
    geom_violin(aes(fill = loop), color = "black",
                linewidth = lineThick * mmToLineUnit, lineend = "square",
                show.legend = FALSE) +
    geom_boxplot(width = 0.1, color = "black",
                 linewidth = lineThick * mmToLineUnit, lineend = "square",
                 outlier.size = 1, outlier.stroke = NA) + theme_classic() +
    stat_summary(
      aes(group = loop), fun = mean,
      geom = "point", shape = 21, size = 1,
      fill = "red", color = "black"
    ) + theme(
      axis.title = element_text(
        size = fontSizeS, family = fontType, color = "#000000"
      ),
      axis.text = element_text(
        size = fontSizeS, family = fontType, color = "#000000"
      ),
      axis.text.x = element_text(
        angle = 45, hjust = 1, vjust = 1
      ),
      axis.line = element_line(
        color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
      ),
      axis.ticks = element_line(
        color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
      ),
      panel.background = element_rect(fill = "transparent"),
      legend.text = element_text(family = fontType, size = fontSizeS),
      legend.title = element_text(family = fontType, size = fontSizeS)
    ) + 
    #coord_cartesian(ylim = c(quantile(data$sumScore, 0.0), quantile(data$sumScore, 0.99))) + 
    annotate(
      "text", x = 1, y = quantile(data$sumScore, 0.5),
      label = paste0("p12: ", convPvalue(p12), "\n",
                     "p13: ", convPvalue(p13), "\n",
                     "p23: ", convPvalue(p23)),
      color = "black", hjust = 0, size = 3
    ) 
  #+ scale_y_continuous(trans = "log10")
  
  fileName <- paste0("ChIP_peak_avgPeakScore_boundaryAnchor_", name)
  width <- panelSize(2)*mmToInch
  height <- panelSize(2)*mmToInch
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}

# Sum peak score
####
track <- import(here(refDir, "33255_H3K4me3_04-745_Bruce-4_trim_q20_dedup_black_depthNorm.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "33255_H3K4me3_04-745_Bruce-4_peaks.mergePeak.bed"))
plotSumScoresBinary(track, peak, "H3K4me3", anchor.noBoundary, anchor.withinBoundary, anchor.outsideBoundary)

track <- import(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.narrowPeak.bed"))
#plotSumScores(track, peak, "H3K27ac")
plotSumScoresBinary(track, peak, "H3K27ac", anchor.noBoundary, anchor.withinBoundary, anchor.outsideBoundary)

track <- import(here(refDir, "GSM2683440_J1_H3K14ac_mm10Lifted.black.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "GSM2683440_J1_H3K14ac_mm10Lifted.bed"))
#plotSumScores(track, peak, "H3K14ac")
plotSumScoresBinary(track, peak, "H3K14ac", anchor.noBoundary, anchor.withinBoundary, anchor.outsideBoundary)

track <- import(here(refDir, "33248_CTCF_07-729_Bruce-4_trim_q20_dedup_black_depthNorm.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "33248_CTCF_07-729_Bruce-4_peaks.mergePeak.bed"))
#plotSumScores(track, peak, "CTCF")
plotSumScoresBinary(track, peak, "CTCF", anchor.noBoundary, anchor.withinBoundary, anchor.outsideBoundary)

track <- import(here(refDir, "33250_RAD21_ab992_Bruce-4_trim_q20_dedup_black_depthNorm.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "33250_RAD21_ab992_Bruce-4_peaks.mergePeak.bed"))
#plotSumScores(track, peak, "RAD21")
plotSumScoresBinary(track, peak, "RAD21", anchor.noBoundary, anchor.withinBoundary, anchor.outsideBoundary)



######## LOLA
atac <- fread(here(refDir, "GSM3106257_ATAC_ESC_1.bed")) %>% dplyr::select(V1, V2, V3)
colnames(atac) <- c("chr", "start", "end")
atac.gr <- makeGRangesFromDataFrame(atac)

# LOADING LOOPS
loop.all <- fread(here(consensusDir, "chromo_cons_annoHierarchy.tsv")) %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "E-E"))
anchor.all <- (extractAnchor(loop.all))
overlaps <- findOverlaps(anchor.all, atac.gr)
anchor.all <- pintersect(anchor.all[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

overlaps <- findOverlaps(anchor.noBoundary, atac.gr)
anchor.noBoundary <- pintersect(anchor.noBoundary[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
overlaps <- findOverlaps(anchor.withinBoundary, atac.gr)
anchor.withinBoundary <- pintersect(anchor.withinBoundary[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
overlaps <- findOverlaps(anchor.outsideBoundary, atac.gr)
anchor.outsideBoundary <- pintersect(anchor.outsideBoundary[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])



anchors <- list(anchor.noBoundary, anchor.withBoundary, anchor.outsideBoundary)
tbs <- list()
temps <- list()

alpha <- 0.05

# Process clusters c1 to c8
for (i in 1:3) {
  
  anchor <- anchors[[i]]
  # Run LOLA
  result <- runLOLA(anchor, anchor.all, lolaDB)
  tb <- as_tibble(result)
  
  # Filter and summarize
  tb <- tb %>%
    dplyr::mutate(target = toupper(antibody)) %>%
    filter(str_to_lower(cellType) == "embryonic stem cell") %>%
    dplyr::filter(qValue < alpha) %>%
    dplyr::group_by(target) %>%
    slice_min(meanRnk, with_ties = FALSE)
  
  # Store tb
  tbs[[i]] <- tb
  
  # Select and rename oddsRatio
  temp <- tb %>% dplyr::select(target, oddsRatio)
  colnames(temp) <- c("target", paste0("OR_c", i))
  
  # Store temp
  temps[[i]] <- temp
}

# Merge all temp tables into one
temp <- Reduce(function(x, y) full_join(x, y, by = "target"), temps) %>%
  mutate_all(~replace_na(., 1))
colnames(temp) <- c("target", "noBoundary", "withinBoundary", "outsideBoundary")
data <- temp %>% column_to_rownames("target") %>% as.matrix()

library(circlize)
col_fun <- colorRamp2(c(1, max(data)), c("white", "red"))


#fviz_nbclust(data, kmeans, method = "wss")

p <- Heatmap(
  data,
  name = "Odds Ratio",                   # Name of the heatmap legend
  cluster_columns = FALSE,            # Remove column dendrogram
  show_row_dend = FALSE,
  col = col_fun,
  border = TRUE
)

[3.37] Checking RAD21 level at tss

tss.5kb.group1 <- fread(here(refDir, "TSS_binaryGroup1.bed")) %>%
  dplyr::mutate(start = (V2 + V3)/2 - 2500,
                end = (V2 + V3)/2 + 2500,
                chr = V1) %>%
  dplyr::select(chr, start, end)
tss.5kb.group1 <- makeGRangesFromDataFrame(tss.5kb.group1)

tss.5kb.group2 <- fread(here(refDir, "TSS_binaryGroup2.bed")) %>%
  dplyr::mutate(start = (V2 + V3)/2 - 2500,
                end = (V2 + V3)/2 + 2500,
                chr = V1) %>%
  dplyr::select(chr, start, end)
tss.5kb.group2 <- makeGRangesFromDataFrame(tss.5kb.group2)


plotSumScoresBinary <- function(track, peak, name, anchor1, anchor2){
  peakTrack <- track[unique(queryHits(findOverlaps(track, peak)))]
  a <- getSumScores(peakTrack, anchor1)
  b <- getSumScores(peakTrack, anchor2)
  a.tb <- tibble(loop = "group1",
                 sumScore = a)
  b.tb <- tibble(loop = "group2",
                 sumScore = b)
  
  data <- bind_rows(a.tb, b.tb) %>% drop_na()
  data$loop <- factor(data$loop, levels = c("group1", "group2"))
  
  p12 <- getPvalWilcox(data, "group1", "group2")
  p <-  ggplot(data, aes(x = loop, y = sumScore)) + 
    labs(x = NULL, y = paste0(name, "\nlog(avg peak score per anchor)")) +
    geom_violin(aes(fill = loop), color = "black",
                linewidth = lineThick * mmToLineUnit, lineend = "square",
                show.legend = FALSE) +
    geom_boxplot(width = 0.1, color = "black",
                 linewidth = lineThick * mmToLineUnit, lineend = "square",
                 outlier.size = 1, outlier.stroke = NA) + theme_classic() +
    stat_summary(
      aes(group = loop), fun = mean,
      geom = "point", shape = 21, size = 1,
      fill = "red", color = "black"
    ) + theme(
      axis.title = element_text(
        size = fontSizeS, family = fontType, color = "#000000"
      ),
      axis.text = element_text(
        size = fontSizeS, family = fontType, color = "#000000"
      ),
      axis.text.x = element_text(
        angle = 45, hjust = 1, vjust = 1
      ),
      axis.line = element_line(
        color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
      ),
      axis.ticks = element_line(
        color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
      ),
      panel.background = element_rect(fill = "transparent"),
      legend.text = element_text(family = fontType, size = fontSizeS),
      legend.title = element_text(family = fontType, size = fontSizeS)
    ) + 
    #coord_cartesian(ylim = c(quantile(data$sumScore, 0.0), quantile(data$sumScore, 0.99))) + 
    annotate(
      "text", x = 1, y = quantile(data$sumScore, 0.5),
      label = paste0("p12: ", convPvalue(p12)),
      color = "black", hjust = 0, size = 3
    ) 
  #+ scale_y_continuous(trans = "log10")
  
  fileName <- paste0("ChIP_peak_avgPeakScore_binaryGroup_", name)
  width <- panelSize(2)*mmToInch
  height <- panelSize(2)*mmToInch
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}

# Sum peak score
####
track <- import(here(refDir, "33248_CTCF_07-729_Bruce-4_trim_q20_dedup_black_depthNorm.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "33248_CTCF_07-729_Bruce-4_peaks.mergePeak.bed"))
plotSumScoresBinary(track, peak, "CTCF", tss.5kb.group1, tss.5kb.group2)

track <- import(here(refDir, "33250_RAD21_ab992_Bruce-4_trim_q20_dedup_black_depthNorm.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "33250_RAD21_ab992_Bruce-4_peaks.mergePeak.bed"))
plotSumScoresBinary(track, peak, "RAD21", tss.5kb.group1, tss.5kb.group2)

[3.38] Stripe percentage

# Create the data
data <- data.frame(
  Group = c("Group1", "Group2"),
  Value = c(31.03, 10.64)
)

# Create the bar plot
p <- ggplot(data, aes(x = Group, y = Value, fill = Group)) + 
  geom_bar(stat = "identity", color = "black", width = 0.7, alpha = 0.6, show.legend = FALSE,
           linewidth = lineThick * mmToLineUnit, lineend = "square") + # Use identity for raw values
  labs(y = "% of TSS on stripe") + 
  theme_classic() +
  theme(
    axis.title = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )

fileName <- paste0("perc_on_stripe_binaryGroup")
width <- panelSize(1.25)*mmToInch
height <- panelSize(1.2)*mmToInch
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

###[3.39] Splitting bedpe

temp <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>% dplyr::select(c(1, 2))
geneList <- (temp %>% dplyr::filter(external_gene_name %in% c("Klf4", "Tbx3", "Jun", "Fosl2", "Myc", "Phlda1")))$ensembl_gene_id

# LOAD ANNOTATED LOOP
name <- "chromo_cons_annoHierarchy"
diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = diffCutoff,
                                      annoList = c("P-P", "P-E", "P-S", "P-X"))%>%
  dplyr::mutate(distance = start2 - start1)

filteredLoop <- geneAnnoData %>% unnest(gene) %>% dplyr::filter(gene %in% geneList)
fwrite(filteredLoop, here(consensusDir, paste0("P-N_selectedGene.bedpe")), 
         sep = "\t", col.names = FALSE)
  
temploop <- filteredLoop %>% dplyr::filter(Anno2 %in% c("P-P", "P-E"))
fwrite(temploop, here(consensusDir, paste0("P-PE_selectedGene.bedpe")), 
         sep = "\t", col.names = FALSE)
  
temploop <- filteredLoop %>% dplyr::filter(Anno2 %in% c("P-S"))
fwrite(temploop, here(consensusDir, paste0("P-S_selectedGene.bedpe")), 
         sep = "\t", col.names = FALSE)

[3.40] Finding largest encompassing S-S

name <- "chromo_cons_annoHierarchy"

# IMPORT S-S loops
allLoops <- fread(here(consensusDir, "chromo_cons_annoHierarchy.tsv"))
strLoops <- allLoops %>% dplyr::filter(Anno2 %in% c("S-S"))
temp <- strLoops %>% dplyr::select(c(1, 2, 6))
colnames(temp) <- c("chrom", "start", "end")
strLoopsGr <- makeGRangesFromDataFrame(temp)

# IMPORT TSS
flankSize <- 10
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3),
                TSSstart = TSS - flankSize,
                TSSend = TSS + flankSize) %>%
  dplyr::select(V1, TSSstart, TSSend, V6, V5)
colnames(gene.tb) <- c("chrom", "start", "end", "ensembl", "gene")
geneGr <- makeGRangesFromDataFrame(gene.tb, keep.extra.columns = TRUE)


# Find overlaps between loops and TSS
overlaps <- findOverlaps(geneGr, strLoopsGr)

# Annotate overlaps
tss_with_loops <- geneGr[queryHits(overlaps)]
loops_with_tss <- strLoopsGr[subjectHits(overlaps)]

# Combine into a data frame for processing
loop_data <- data.frame(gene = tss_with_loops$ensembl,
                        loop_chr = seqnames(loops_with_tss),
                        loop_start = start(loops_with_tss),
                        loop_end = end(loops_with_tss),
                        loop_width = width(loops_with_tss))

# Identify the largest loop for each gene
largest_loops <- loop_data[order(loop_data$gene, -loop_data$loop_width), ]
largest_loops <- largest_loops[!duplicated(largest_loops$gene), ]
largest_loops <- largest_loops %>% dplyr::mutate(loopID = paste(loop_chr, loop_start, loop_end, sep = "_"))
largest_loops <- largest_loops[order(largest_loops$loop_chr, largest_loops$loop_start),]
rownames(largest_loops) <- NULL

# Checking smaller loops
allLoopsGr <- makeGRangesFromDataFrame(allLoops, 
                                       seqnames.field = "chrom1", 
                                       start.field = "start1", 
                                       end.field = "end2")
largestLoopsGr <- makeGRangesFromDataFrame(largest_loops, 
                                           seqnames.field = "loop_chr", 
                                           start.field = "loop_start", 
                                           end.field = "loop_end")
largestLoopsGr <- sort(unique(largestLoopsGr))
complete_overlaps <- subsetByOverlaps(allLoopsGr, largestLoopsGr, type = "within")
overlaps <- findOverlaps(complete_overlaps, largestLoopsGr)


completeOverlapDf <- as.data.frame(complete_overlaps) %>% dplyr::mutate(loopIDAll = paste(seqnames, start, end, sep = "_"))
largestLoopsDf <- as.data.frame(largestLoopsGr) %>% dplyr::mutate(loopID = paste(seqnames, start, end, sep = "_"))

loopIDpairs <- data.frame(loopID = completeOverlapDf$loopIDAll[queryHits(overlaps)],
                          loopID2 = largestLoopsDf$loopID[subjectHits(overlaps)])

allLoopsAnnotated <- allLoops %>% dplyr::mutate(loopID = paste(chrom1, start1, end2, sep = "_")) %>% dplyr::left_join(loopIDpairs, by = c("loopID"))

temp <- allLoopsAnnotated %>% dplyr::filter(Anno2 %in% c("P-E", "P-P", "E-E")) %>% dplyr::mutate(hasSS = !is.na(loopID2))
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(hasSS ==group1) )$diff_dTAG_DMSO
  distance2 <- (data %>% dplyr::filter(hasSS ==group2) )$diff_dTAG_DMSO
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}
pv <- convPvalue(getPvalWilcox(temp, TRUE, FALSE))
ggplot(temp, aes(x = hasSS, y = diff_dTAG_DMSO)) + geom_violin(aes(fill = hasSS), show.legend = FALSE) + 
  geom_boxplot(width = 0.1, outlier.shape = NA) +
  stat_summary(aes(group = hasSS), fun = mean, 
               geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  theme_classic() +
  annotate("text", x = 1, y = 0, label = pv, size = 5, color = "black") +
  ggtitle("PE-PE") + geom_hline(yintercept = 0)


# Counting loop types
allLoopsAnnotatedFiltered <- allLoopsAnnotated %>% dplyr::filter(!is.na(loopID2)) %>%
  dplyr::filter(!(loopID == loopID2))
temp <- allLoopsAnnotatedFiltered %>% dplyr::select(loopID2, Anno2)
result <- temp %>%
  group_by(loopID2, Anno2) %>%
  summarise(count = n(), .groups = 'drop') %>%
  group_by(loopID2) %>%
  mutate(frequency = count / sum(count)) %>%
  arrange(loopID2, Anno2)

result <- largest_loops %>% dplyr::left_join(result, by = c("loopID" = "loopID2"))

gene.group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
gene.group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene

data <- tibble(
  gene = c(gene.group1, gene.group2),
  group = c(rep("group1", length(gene.group1)),
            rep("group2", length(gene.group2)))
)

data <- data %>% dplyr::left_join(result, by = c("gene"))
data <- data %>% filter(complete.cases(.))

all_anno2 <- unique(data$Anno2)

# Calculate average frequency of Anno2 for each gene in each group
result <- data %>%
  # Ensure all possible Anno2 values are present for each gene and group
  complete(gene, Anno2 = all_anno2, fill = list(frequency = 0)) %>% 
  dplyr::mutate(group = case_when(
    gene %in% gene.group1 ~ "group1",
    gene %in% gene.group2 ~ "group2",
    TRUE ~ NA
  )) %>%
  dplyr::select(group, Anno2, frequency)
  

ggplot(result, aes(x = Anno2, fill = group, y = frequency)) + geom_boxplot()
# View results
ggplot(largest_loops, aes(x = loop_width)) + geom_histogram()


### Checking how many genes from each group has encompassing S-S
gene.group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
gene.group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene

data <- tibble(
  gene = c(gene.group1, gene.group2),
  group = c(rep("group1", length(gene.group1)),
            rep("group2", length(gene.group2)))
)

data <- data %>% dplyr::left_join(largest_loops, by = c("gene"))
data <- data %>% dplyr::mutate(
  hasSS = ifelse(is.na(loop_width), "NO", "YES")
)

data_summary <- data %>%
  group_by(group) %>%
  summarize(
    total = n(),
    hasSS_yes = sum(hasSS == "YES"),
    percentage_yes = (hasSS_yes / total) * 100
  )

ggplot(data_summary, aes(x = group, y = percentage_yes, fill = group)) +
  geom_bar(stat = "identity", color = "black") +
  labs(
    title = "Percentage of Each Group with hasSS = YES",
    x = "Group",
    y = "Percentage (%)"
  ) + ylim(0, 100) +
  theme_minimal()


temp <- data %>% dplyr::filter(hasSS == "YES")
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$loop_width
  distance2 <- (data %>% dplyr::filter(group ==group2) )$loop_width
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}
pv <- convPvalue(getPvalWilcox(temp, "group1", "group2"))

ggplot(temp, aes(x = group, y = loop_width)) + geom_violin() + geom_boxplot(width = 0.5) +
  annotate("text", x = 1, y = 0, label = pv, size = 5, color = "black")

### Comparing RNA perturbation

diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(c(1, 4, 5)) %>%
  dplyr::mutate(hasSS = ifelse(ensembl_gene_id %in% largest_loops$gene, "hasSS", "noSS"))


ggplot(diff.RNA, aes(x = abs(log2FoldChange), color = hasSS)) +
  stat_ecdf(size = 0.4) +
  labs(
    x = "Absolute log2(fold change)",
    y = "Cumulative Probability"
  ) + coord_cartesian(xlim = c(0, 1.5)) +
  theme_classic() + # Clean theme
  theme(
      axis.title = element_text(
        size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
    )


### Checking ChIP peak density
largest_loopsGr <- makeGRangesFromDataFrame(largest_loops, 
                                            seqnames.field = "loop_chr", 
                                            start.field = "loop_start", 
                                            end.field = "loop_end", 
                                            keep.extra.columns = TRUE)

# Find overlaps between ChIP-seq peaks and the largest loops
chip_overlaps <- findOverlaps(largest_loopsGr, peak.CTCF)

# Count the number of ChIP-seq peaks per loop
loop_peak_counts <- table(queryHits(chip_overlaps))

# Create a data frame with the counts and loop details
largest_loops$density <- 0  # Initialize density column
largest_loops$count <- 0    # Initialize peak count column

# Add peak counts to the corresponding loops
largest_loops[as.numeric(names(loop_peak_counts)), "count"] <- as.integer(loop_peak_counts)

# Calculate the density (peaks per kilobase)
largest_loops$density <- largest_loops$count / ((largest_loops$loop_width-1) / 1000)


data <- tibble(
  gene = c(gene.group1, gene.group2),
  group = c(rep("group1", length(gene.group1)),
            rep("group2", length(gene.group2)))
)

data <- data %>% dplyr::left_join(largest_loops, by = c("gene"))
data <- data %>% dplyr::mutate(
  hasSS = ifelse(is.na(loop_width), "NO", "YES")
) %>% dplyr::filter(hasSS == "YES")

getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$density
  distance2 <- (data %>% dplyr::filter(group ==group2) )$density
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

pv <- convPvalue(getPvalWilcox(data, "group1", "group2"))

ggplot(data, aes(x = group, y = density)) + geom_violin(aes(fill = group), show.legend = FALSE) + 
  geom_boxplot(width = 0.1, outlier.shape = NA) +
  stat_summary(aes(group = group), fun = mean, 
               geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  theme_classic() +
  annotate("text", x = 1, y = 0, label = pv, size = 5, color = "black") +
  ggtitle("CTCF")

[3.41] Finding largest convergent encompassing S-S

Filtering convergent S-S loops

name <- "chromo_cons_annoHierarchy"

# IMPORT S-S LOOPS
allLoops <- fread(here(consensusDir, "chromo_cons_annoHierarchy.tsv"))
strLoops <- allLoops %>% dplyr::filter(Anno2 %in% c("S-S"))
temp <- strLoops %>% dplyr::select(c(1, 2, 3, 5, 6))  # Assuming columns include anchor positions
colnames(temp) <- c("chrom", "start1", "end1", "start2", "end2")
strLoopsGr1 <- makeGRangesFromDataFrame(
  temp %>% dplyr::select(chrom, start = start1, end = end1)
)
strLoopsGr2 <- makeGRangesFromDataFrame(
  temp %>% dplyr::select(chrom, start = start2, end = end2)
)

# IMPORT CTCF MOTIF ANNOTATION
ctcfMotifs <- fread(here(refDir, "33248_CTCF_07-729_Bruce-4_peaks.mergePeak.motifAnnotated.bed"))
colnames(ctcfMotifs) <- c("chrom", "start", "end", "motif")
ctcfGr <- makeGRangesFromDataFrame(ctcfMotifs, keep.extra.columns = TRUE)

# Annotate each loop anchor with CTCF motifs
anchor1_with_ctcf <- findOverlaps(strLoopsGr1, ctcfGr)
anchor2_with_ctcf <- findOverlaps(strLoopsGr2, ctcfGr)

# Get motif information for anchor1 overlaps
anchor1_ctcf_motif <- rep(NA, length(strLoopsGr1))  # Initialize motif vector
anchor1_ctcf_motif[queryHits(anchor1_with_ctcf)] <- ctcfGr[subjectHits(anchor1_with_ctcf)]$motif

# Get motif information for anchor2 overlaps
anchor2_ctcf_motif <- rep(NA, length(strLoopsGr2))  # Initialize motif vector
anchor2_ctcf_motif[queryHits(anchor2_with_ctcf)] <- ctcfGr[subjectHits(anchor2_with_ctcf)]$motif

# Add motif information to strLoops
strLoops <- strLoops %>%
  mutate(
    anchor1_motif = anchor1_ctcf_motif,
    anchor2_motif = anchor2_ctcf_motif
  )

# Filter for convergent CTCF interactions (forward in anchor1, reverse in anchor2)
convergent_loops <- strLoops %>%
  filter(anchor1_motif == "fwd" & anchor2_motif == "rev") %>% dplyr::filter(Anno2 %in% c("S-S"))

Processing

# Extract loop boundary from left to right
temp <- convergent_loops %>% dplyr::select(c(1, 2, 6))
colnames(temp) <- c("chrom", "start", "end")
strLoopsGr <- makeGRangesFromDataFrame(temp)

# IMPORT TSS
flankSize <- 10
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3),
                TSSstart = TSS - flankSize,
                TSSend = TSS + flankSize) %>%
  dplyr::select(V1, TSSstart, TSSend, V6, V5)
colnames(gene.tb) <- c("chrom", "start", "end", "ensembl", "gene")
geneGr <- makeGRangesFromDataFrame(gene.tb, keep.extra.columns = TRUE)


# Find overlaps between loops and TSS
overlaps <- findOverlaps(geneGr, strLoopsGr)

# Annotate overlaps
tss_with_loops <- geneGr[queryHits(overlaps)]
loops_with_tss <- strLoopsGr[subjectHits(overlaps)]

# Combine into a data frame for processing
loop_data <- data.frame(gene = tss_with_loops$ensembl,
                        loop_chr = seqnames(loops_with_tss),
                        loop_start = start(loops_with_tss),
                        loop_end = end(loops_with_tss),
                        loop_width = width(loops_with_tss))

# Identify the largest loop for each gene
largest_loops <- loop_data[order(loop_data$gene, -loop_data$loop_width), ]
largest_loops <- largest_loops[!duplicated(largest_loops$gene), ]
largest_loops <- largest_loops %>% dplyr::mutate(loopID = paste(loop_chr, loop_start, loop_end, sep = "_"))
largest_loops <- largest_loops[order(largest_loops$loop_chr, largest_loops$loop_start),]
rownames(largest_loops) <- NULL

# Checking smaller loops
allLoopsGr <- makeGRangesFromDataFrame(allLoops, 
                                       seqnames.field = "chrom1", 
                                       start.field = "start1", 
                                       end.field = "end2")
largestLoopsGr <- makeGRangesFromDataFrame(largest_loops, 
                                           seqnames.field = "loop_chr", 
                                           start.field = "loop_start", 
                                           end.field = "loop_end")
largestLoopsGr <- sort(unique(largestLoopsGr))
complete_overlaps <- subsetByOverlaps(allLoopsGr, largestLoopsGr, type = "within")
overlaps <- findOverlaps(complete_overlaps, largestLoopsGr)


completeOverlapDf <- as.data.frame(complete_overlaps) %>% dplyr::mutate(loopIDAll = paste(seqnames, start, end, sep = "_"))
largestLoopsDf <- as.data.frame(largestLoopsGr) %>% dplyr::mutate(loopID = paste(seqnames, start, end, sep = "_"))

loopIDpairs <- data.frame(loopID = completeOverlapDf$loopIDAll[queryHits(overlaps)],
                          loopID2 = largestLoopsDf$loopID[subjectHits(overlaps)])

# allLoopsAnnotated <- allLoops %>% dplyr::mutate(loopID = paste(chrom1, start1, end2, sep = "_")) %>% dplyr::left_join(loopIDpairs, by = c("loopID"))


##################################
diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = diffCutoff,
                                      annoList = c("P-P", "P-E", "P-S", "P-X")) %>% unnest(gene) %>%
  dplyr::mutate(loopID = paste(chrom1, start1, end2, sep = "_"))
temp <- geneAnnoData %>% dplyr::left_join(loopIDpairs, by = c("loopID"))
temp2 <- temp %>% dplyr::select(gene, loopID2) %>%
  group_by(gene) %>%              # Group data by the 'gene' column
  summarize(geneHasSS = !all(is.na(loopID2)))  # Check if all loopID2 values for each gene are NA
temp <- temp %>% left_join(temp2, by = c("gene"))
data.all <- temp %>% dplyr::mutate(group = ifelse(!geneHasSS, "No",
                                              ifelse(is.na(loopID2), "Outside", "Within")),
                               size = start2 - start1)

##################################
outfile <- data.all %>% dplyr::select(loopID2, gene) %>% dplyr::filter(!is.na(loopID2)) %>%
  separate(loopID2, into = c("chrom", "start", "end"), sep = "_", convert = TRUE)
# fwrite(outfile, here(consensusDir, "convergent_ss_domain.bed"), sep = "\t", col.names = FALSE)

Plotting

data <- data.all %>% dplyr::filter(Anno2 %in% c("P-E"))

temp <- data %>% dplyr::select(chrom1, start1, end1, chrom2, start2, end2, group) %>% distinct()

temp1 <- temp %>% dplyr::filter(group == "No") %>% dplyr::select(-group)
fwrite(temp1, here(consensusDir, "insulated_domain_ss_no.bedpe"), sep = "\t", col.names = FALSE)
temp2 <- temp %>% dplyr::filter(group == "Within") %>% dplyr::select(-group)
fwrite(temp2, here(consensusDir, "insulated_domain_ss_within.bedpe"), sep = "\t", col.names = FALSE)
temp3 <- temp %>% dplyr::filter(group == "Outside") %>% dplyr::select(-group)
fwrite(temp3, here(consensusDir, "insulated_domain_ss_outside.bedpe"), sep = "\t", col.names = FALSE)

temp <- data %>% dplyr::select(id, group, size) %>% distinct()


getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$size
  distance2 <- (data %>% dplyr::filter(group ==group2) )$size
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

convPvalue(getPvalWilcox(temp, "No", "Within"))
convPvalue(getPvalWilcox(temp, "No", "Outside"))
convPvalue(getPvalWilcox(temp, "Outside", "Within"))


p <- ggplot(temp, aes(x = group, y = size, fill = group)) + 
  geom_violin(linewidth = lineMedium* mmToLineUnit, lineend = "square", alpha = .4, , show.legend = FALSE) + 
  geom_boxplot(width = 0.3, color = "black",
                 linewidth = lineMedium * mmToLineUnit, lineend = "square",
                 outlier.shape = NA,  alpha = 0.6, show.legend = FALSE) + 
  theme_classic() + scale_y_continuous(labels = label_kb_mb) +
  scale_fill_manual(values = c("#777777", "#F28E2C", "#F28E2C")) +
  stat_summary(
      aes(group = group), fun = mean,
      geom = "point", shape = 21, size = 0.5,
      fill = "black", color = "black", position = position_dodge(.3)
  ) +
  theme(
    axis.title.x = element_blank(),
    axis.title.y = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),    
    axis.text.x = element_text(
      size = fontSizeM,
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  ) +labs(y = "Loop size")

fileName <- paste0("insulationBoundary_size_convSS_PE")
width <-33*mmToInch
height <- 35*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$diff_dTAG_DMSO
  distance2 <- (data %>% dplyr::filter(group ==group2) )$diff_dTAG_DMSO
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

temp <- data %>% dplyr::select(group, id, diff_dTAG_DMSO) %>% distinct()

convPvalue(getPvalWilcox(temp, "No", "Within"))
convPvalue(getPvalWilcox(temp, "No", "Outside"))
convPvalue(getPvalWilcox(temp, "Outside", "Within"))

p <- ggplot(data, aes(x = group, y = diff_dTAG_DMSO, fill = group)) + 
  geom_violin(linewidth = lineMedium * mmToLineUnit, lineend = "square", alpha = .4, , show.legend = FALSE) + 
  geom_boxplot(width = 0.3, color = "black",
               linewidth = lineMedium * mmToLineUnit, lineend = "square",
               outlier.shape = NA,  alpha = 0.6, show.legend = FALSE) + 
  theme_classic() +   
    scale_fill_manual(values = c("#777777", "#F28E2C", "#F28E2C")) +
  stat_summary(
    aes(group = group), fun = mean,
    geom = "point", shape = 21, size = 0.5,
    fill = "black", color = "black", position = position_dodge(.3)
  ) +
  theme(
    axis.title.x = element_blank(),
    axis.title.y = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),    
    axis.text.x = element_text(
      size = fontSizeM,
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  ) +labs(y = "Δ loop score") +    
  geom_hline(yintercept = 0,
             alpha = 1,
             color = "black",
             size = lineThick*mmToLineUnit,
             lineend = "square") + 
  geom_hline(yintercept = -0.2,
             alpha = 0.5, 
             color = "black",
             size = lineThick*mmToLineUnit,
             lineend = "square", linetype = "dashed") +
  coord_cartesian(ylim = c(-0.8, 0.5))
    
fileName <- paste0("insulationBoundary_delta_convSS_pe")
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()  


###################### 

minValue <- -4
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
  dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
                log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)),
                log_obsexp_diff_dTAG_DMSO = log_obsexp_dTAG - log_obsexp_DMSO,
                log_obsexp_diff_A485_DMSO = log_obsexp_A485 - log_obsexp_DMSO)

data2 <- data %>% left_join(obsexp, by = c("id"))

temp <- data2 %>% dplyr::select(group, id, log_obsexp_diff_dTAG_DMSO) %>% distinct()


getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$log_obsexp_diff_dTAG_DMSO
  distance2 <- (data %>% dplyr::filter(group ==group2) )$log_obsexp_diff_dTAG_DMSO
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

convPvalue(getPvalWilcox(temp, "No", "Within"))
convPvalue(getPvalWilcox(temp, "No", "Outside"))
convPvalue(getPvalWilcox(temp, "Outside", "Within"))


p <- ggplot(temp, aes(x = group, y = log_obsexp_diff_dTAG_DMSO, fill = group)) + 
  geom_violin(linewidth = lineMedium* mmToLineUnit, lineend = "square", alpha = .4, , show.legend = FALSE) + 
  geom_boxplot(width = 0.3, color = "black",
               linewidth = lineMedium * mmToLineUnit, lineend = "square",
               outlier.shape = NA,  alpha = 0.6, show.legend = FALSE) + 
  theme_classic() + 
  scale_fill_manual(values = c("#777777", "#F28E2C", "#F28E2C")) +
  
  stat_summary(
    aes(group = group), fun = mean,
    geom = "point", shape = 21, size = 0.5,
    fill = "black", color = "black", position = position_dodge(.3)
  ) +
  theme(
    axis.title.x = element_blank(),
    axis.title.y = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),    
    axis.text.x = element_text(
      size = fontSizeM,
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  ) +labs(y = "log2(fc of obs/exp)") +    
  geom_hline(yintercept = 0,
             alpha = 1,
             color = "black",
             size = lineThick*mmToLineUnit,
             lineend = "square") + 
  coord_cartesian(ylim = c(-2, 2))
    width <-30*mmToInch
height <- 35*mmToInch
fileName <- paste0("insulationBoundary_obsexp_convSS_pe")
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()  





#####################

data <- data.all
gene.withinBoundary <- unique((data %>% dplyr::filter(group == "Within"))$gene)
gene.outsideBoundary <- unique((data %>% dplyr::filter(group == "Outside"))$gene)
gene.noBoundary <- unique((data %>% dplyr::filter(group == "No"))$gene)

gene.wBoundary <- unique(c(gene.withinBoundary, gene.outsideBoundary))

gene.group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
gene.group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene

print("perc of gene with loop within Boundary")
sum(gene.group1 %in% gene.withinBoundary)/length(gene.group1)*100
sum(gene.group2 %in% gene.withinBoundary)/length(gene.group2)*100

print("perc of gene with loop outside Boundary")
sum(gene.group1 %in% gene.outsideBoundary)/length(gene.group1)*100
sum(gene.group2 %in% gene.outsideBoundary)/length(gene.group2)*100

print("perc of gene with loop with no Boundary")
sum(gene.group1 %in% gene.noBoundary)/length(gene.group1)*100
sum(gene.group2 %in% gene.noBoundary)/length(gene.group2)*100


diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name) %>%
  dplyr::mutate(group = case_when(
    ensembl_gene_id %in% gene.wBoundary ~ "withinBoundary",
    ensembl_gene_id %in% gene.noBoundary ~ "noBoundary",
    TRUE ~ NA
  )) %>%
  dplyr::filter(!is.na(group)) %>%
  dplyr::mutate(absLog2FC = abs(log2FoldChange))
ks_result <- ks.test(
  diff.RNA %>% dplyr::filter(group == "withinBoundary") %>% pull(absLog2FC),
  diff.RNA %>% dplyr::filter(group == "noBoundary") %>% pull(absLog2FC)
)

ggplot(diff.RNA, aes(x = group, y = log2FoldChange)) + geom_violin() + geom_boxplot(width = 0.1)

# getPvalWilcox <- function(data, group1, group2){
#   distance1 <- (data %>% dplyr::filter(group ==group1) )$absLog2FC
#   distance2 <- (data %>% dplyr::filter(group ==group2) )$absLog2FC
#   wil <- wilcox.test(distance1, distance2)
#   return(wil$p.value)
# }
# convPvalue(getPvalWilcox(diff.RNA, "withBoundary", "noBoundary"))
# 

# Create the CDF plot
p <- ggplot(diff.RNA, aes(x = absLog2FC, color = group)) +
  scale_color_manual(values = (c("#777777", "#F28E2C"))) +

stat_ecdf(size = 0.4, linewidth = lineMedium * mmToLineUnit, lineend = "square"  ) + # Use stat_ecdf to plot the empirical CDF
  labs(
    x = "Abs. log2(fold change)",
    y = "Cumulative Probability"
  ) + coord_cartesian(xlim = c(0, 1)) +
  theme_classic() + # Clean theme
  theme(
      axis.title = element_text(
        size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.position = "none",
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
    )

fileName <- paste0("log2FC_cdf_insulationBoundary_convSS_pe")
width <- 33*mmToInch
height <-33*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
temp <- allLoopsAnnotated %>% dplyr::filter(Anno2 %in% c("P-E")) %>% dplyr::mutate(hasSS = !is.na(loopID2))
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(hasSS ==group1) )$diff_dTAG_DMSO
  distance2 <- (data %>% dplyr::filter(hasSS ==group2) )$diff_dTAG_DMSO
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}
pv <- convPvalue(getPvalWilcox(temp, TRUE, FALSE))
ggplot(temp, aes(x = hasSS, y = diff_dTAG_DMSO)) + geom_violin(aes(fill = hasSS), show.legend = FALSE) + 
  geom_boxplot(width = 0.1, outlier.shape = NA) +
  stat_summary(aes(group = hasSS), fun = mean, 
               geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  theme_classic() +
  annotate("text", x = 1, y = 0, label = pv, size = 5, color = "black") +
  ggtitle("PE") + geom_hline(yintercept = 0)




# Counting loop types
allLoopsAnnotatedFiltered <- allLoopsAnnotated %>% dplyr::filter(!is.na(loopID2)) %>%
  dplyr::filter(!(loopID == loopID2))
temp <- allLoopsAnnotatedFiltered %>% dplyr::select(loopID2, Anno2)
result <- temp %>%
  group_by(loopID2, Anno2) %>%
  summarise(count = n(), .groups = 'drop') %>%
  group_by(loopID2) %>%
  mutate(frequency = count / sum(count)) %>%
  arrange(loopID2, Anno2)

result <- largest_loops %>% dplyr::left_join(result, by = c("loopID" = "loopID2"))

gene.group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
gene.group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene

data <- tibble(
  gene = c(gene.group1, gene.group2),
  group = c(rep("group1", length(gene.group1)),
            rep("group2", length(gene.group2)))
)

data <- data %>% dplyr::left_join(result, by = c("gene"))
data <- data %>% filter(complete.cases(.))

all_anno2 <- unique(data$Anno2)

# Calculate average frequency of Anno2 for each gene in each group
result <- data %>%
  # Ensure all possible Anno2 values are present for each gene and group
  complete(gene, Anno2 = all_anno2, fill = list(frequency = 0)) %>% 
  dplyr::mutate(group = case_when(
    gene %in% gene.group1 ~ "group1",
    gene %in% gene.group2 ~ "group2",
    TRUE ~ NA
  )) %>%
  dplyr::select(group, Anno2, frequency)
  

ggplot(result, aes(x = Anno2, fill = group, y = frequency)) + geom_boxplot()



# View results
ggplot(largest_loops, aes(x = loop_width)) + geom_histogram()


### Checking how many genes from each group has encompassing S-S
gene.group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
gene.group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene

data <- tibble(
  gene = c(gene.group1, gene.group2),
  group = c(rep("group1", length(gene.group1)),
            rep("group2", length(gene.group2)))
)

data <- data %>% dplyr::left_join(largest_loops, by = c("gene"))
data <- data %>% dplyr::mutate(
  hasSS = ifelse(is.na(loop_width), "NO", "YES")
)

data_summary <- data %>%
  group_by(group) %>%
  summarize(
    total = n(),
    hasSS_yes = sum(hasSS == "YES"),
    percentage_yes = (hasSS_yes / total) * 100
  )

ggplot(data_summary, aes(x = group, y = percentage_yes, fill = group)) +
  geom_bar(stat = "identity", color = "black") +
  labs(
    title = "Percentage of Each Group with hasSS = YES",
    x = "Group",
    y = "Percentage (%)"
  ) + ylim(0, 100) +
  theme_minimal()

temp <- data %>% dplyr::filter(hasSS == "YES")
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$loop_width
  distance2 <- (data %>% dplyr::filter(group ==group2) )$loop_width
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}
pv <- convPvalue(getPvalWilcox(temp, "group1", "group2"))

ggplot(temp, aes(x = group, y = loop_width)) + geom_violin() + geom_boxplot(width = 0.5) +
  annotate("text", x = 1, y = 0, label = pv, size = 5, color = "black")


### Comparing RNA perturbation

diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(c(1, 4, 5)) %>%
  dplyr::mutate(hasSS = ifelse(ensembl_gene_id %in% largest_loops$gene, "hasSS", "noSS"))


ggplot(diff.RNA, aes(x = abs(log2FoldChange), color = hasSS)) +
  stat_ecdf(size = 0.4) +
  labs(
    x = "Absolute log2(fold change)",
    y = "Cumulative Probability"
  ) + coord_cartesian(xlim = c(0, 1.5)) +
  theme_classic() + # Clean theme
  theme(
      axis.title = element_text(
        size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
    )



### Checking ChIP peak density
largest_loopsGr <- makeGRangesFromDataFrame(largest_loops, 
                                            seqnames.field = "loop_chr", 
                                            start.field = "loop_start", 
                                            end.field = "loop_end", 
                                            keep.extra.columns = TRUE)

# Find overlaps between ChIP-seq peaks and the largest loops
chip_overlaps <- findOverlaps(largest_loopsGr, peak.H3K4me3)

# Count the number of ChIP-seq peaks per loop
loop_peak_counts <- table(queryHits(chip_overlaps))

# Create a data frame with the counts and loop details
largest_loops$density <- 0  # Initialize density column
largest_loops$count <- 0    # Initialize peak count column

# Add peak counts to the corresponding loops
largest_loops[as.numeric(names(loop_peak_counts)), "count"] <- as.integer(loop_peak_counts)

# Calculate the density (peaks per kilobase)
largest_loops$density <- largest_loops$count / ((largest_loops$loop_width-1) / 1000)


data <- tibble(
  gene = c(gene.group1, gene.group2),
  group = c(rep("group1", length(gene.group1)),
            rep("group2", length(gene.group2)))
)

data <- data %>% dplyr::left_join(largest_loops, by = c("gene"))
data <- data %>% dplyr::mutate(
  hasSS = ifelse(is.na(loop_width), "NO", "YES")
) %>% dplyr::filter(hasSS == "YES")

getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$density
  distance2 <- (data %>% dplyr::filter(group ==group2) )$density
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

pv <- convPvalue(getPvalWilcox(data, "group1", "group2"))

ggplot(data, aes(x = group, y = density)) + geom_violin(aes(fill = group), show.legend = FALSE) + 
  geom_boxplot(width = 0.1, outlier.shape = NA) +
  stat_summary(aes(group = group), fun = mean, 
               geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  theme_classic() +
  annotate("text", x = 1, y = 0, label = pv, size = 5, color = "black") +
  ggtitle("H3K4me3")

[3.42] Comparing to Reg loops to A485

name <- "chromo_cons_annoHierarchy"
# UP loop
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_UP_diff0.2.bedpe")) %>%
  dplyr::mutate(loopID = paste(V1, V2, V6, sep = "_"))
# NO loop
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_NO_diff0.2.bedpe")) %>%
  dplyr::mutate(loopID = paste(V1, V2, V6, sep = "_"))
# UP NO
loop.upno <- bind_rows(loop.up, loop.no)
# DOWN loop
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_DOWN_diff0.2.bedpe")) %>%
  dplyr::mutate(loopID = paste(V1, V2, V6, sep = "_"))


allLoops <- fread(here(consensusDir, "chromo_cons_annoHierarchy.tsv")) %>%
  dplyr::mutate(loopID = paste(chrom1, start1, end2, sep = "_")) %>%
  dplyr::mutate(group = case_when(
    loopID %in% loop.upno$loopID ~ "UP&NO",
    loopID %in% loop.down$loopID ~ "DOWN",
    TRUE ~ NA
  )) %>%
  dplyr::filter(!is.na(group))

minValue <- -4
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
  dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
                log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)),
                log_obsexp_diff_dTAG_DMSO = log_obsexp_dTAG - log_obsexp_DMSO,
                log_obsexp_diff_A485_DMSO = log_obsexp_A485 - log_obsexp_DMSO)

allLoops <- allLoops %>% left_join(obsexp, by = c("id"))
allLoops$group <- factor(allLoops$group, levels = c("UP&NO", "DOWN"))


### Plotting
p1 <- ggplot(allLoops, aes(x = group, y = diff_A485_DMSO, fill = group)) + 
  geom_violin(linewidth = lineThick * mmToLineUnit, lineend = "square", alpha = .4, , show.legend = FALSE) + 
  geom_boxplot(width = 0.3, color = "black",
               linewidth = lineThick * mmToLineUnit, lineend = "square",
               outlier.shape = NA,  alpha = 0.6, show.legend = FALSE) + 
  theme_classic() + 
  stat_summary(
    aes(group = group), fun = mean,
    geom = "point", shape = 21, size = 0.5,
    fill = "black", color = "black", position = position_dodge(.3)
  ) +
  scale_fill_manual(values = c("grey50", "grey50")) +

  theme(
    axis.title.x = element_blank(),
    axis.title.y = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),    
    axis.text.x = element_text(
      size = fontSizeM,
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  ) +labs(y = "Δ loop score")  +    
  geom_hline(yintercept = 0,
             alpha = 1,
             color = "black",
             size = lineThick*mmToLineUnit,
             lineend = "square") + 
  geom_hline(yintercept = -0.2,
             alpha = 0.5, 
             color = "black",
             size = lineThick*mmToLineUnit,
             lineend = "square", linetype = "dashed") +
  coord_cartesian(ylim = c(-0.5, 0.5))
    

####
p2 <- ggplot(allLoops, aes(x = group, y = log_obsexp_diff_A485_DMSO, fill = group)) + 
  geom_violin(linewidth = lineThick * mmToLineUnit, lineend = "square", alpha = .4, , show.legend = FALSE) + 
  geom_boxplot(width = 0.3, color = "black",
               linewidth = lineThick * mmToLineUnit, lineend = "square",
               outlier.shape = NA,  alpha = 0.6, show.legend = FALSE) + 
  theme_classic() + 
  stat_summary(
    aes(group = group), fun = mean,
    geom = "point", shape = 21, size = 0.5,
    fill = "black", color = "black", position = position_dodge(.3)
  ) +
  scale_fill_manual(values = c("grey50", "grey50")) +

  theme(
    axis.title.x = element_blank(),
    axis.title.y = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),    
    axis.text.x = element_text(
      size = fontSizeM,
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  ) +labs(y = "log2(fc of obs/exp)")  +    
  geom_hline(yintercept = 0,
             alpha = 1,
             color = "black",
             size = lineThick*mmToLineUnit,
             lineend = "square") + 
  coord_cartesian(ylim = c(-2, 2))
  


#############

### Plotting
p3 <- ggplot(allLoops, aes(x = group, y = diff_dTAG_DMSO, fill = group)) + 
  geom_violin(linewidth = lineThick * mmToLineUnit, lineend = "square", alpha = .4, , show.legend = FALSE) + 
  geom_boxplot(width = 0.3, color = "black",
               linewidth = lineThick * mmToLineUnit, lineend = "square",
               outlier.shape = NA,  alpha = 0.6, show.legend = FALSE) + 
  theme_classic() + 
  stat_summary(
    aes(group = group), fun = mean,
    geom = "point", shape = 21, size = 0.5,
    fill = "black", color = "black", position = position_dodge(.3)
  ) +
  scale_fill_manual(values = c("grey50", "grey50")) +

  theme(
    axis.title.x = element_blank(),
    axis.title.y = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),    
    axis.text.x = element_text(
      size = fontSizeM,
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  ) +labs(y = "Δ loop score")  +    
  geom_hline(yintercept = 0,
             alpha = 1,
             color = "black",
             size = lineThick*mmToLineUnit,
             lineend = "square") + 
  geom_hline(yintercept = -0.2,
             alpha = 0.5, 
             color = "black",
             size = lineThick*mmToLineUnit,
             lineend = "square", linetype = "dashed") +
  coord_cartesian(ylim = c(-0.5, 0.5))
    

####
p4 <- ggplot(allLoops, aes(x = group, y = log_obsexp_diff_dTAG_DMSO, fill = group)) + 
  geom_violin(linewidth = lineThick * mmToLineUnit, lineend = "square", alpha = .4, , show.legend = FALSE) + 
  geom_boxplot(width = 0.3, color = "black",
               linewidth = lineThick * mmToLineUnit, lineend = "square",
               outlier.shape = NA,  alpha = 0.6, show.legend = FALSE) + 
  theme_classic() + 
  stat_summary(
    aes(group = group), fun = mean,
    geom = "point", shape = 21, size = 0.5,
    fill = "black", color = "black", position = position_dodge(.3)
  ) +
  scale_fill_manual(values = c("grey50", "grey50")) +

  theme(
    axis.title.x = element_blank(),
    axis.title.y = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),    
    axis.text.x = element_text(
      size = fontSizeM,
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  ) +labs(y = "log2(fc of obs/exp)")  +    
  geom_hline(yintercept = 0,
             alpha = 1,
             color = "black",
             size = lineThick*mmToLineUnit,
             lineend = "square") + 
  coord_cartesian(ylim = c(-2, 2))
  
width <- panelSize(2)*mmToInch
height <- panelSize(1.5)*mmToInch
fileName <- paste0("regLoopScorefromdTAG_dTAG")
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(p1, p2))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(plot_grid(p1, p2))
dev.off()  

width <- panelSize(2)*mmToInch
height <- panelSize(1.5)*mmToInch
fileName <- paste0("regLoopScorefromdTAG_A485")
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(p3, p4))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(plot_grid(p3, p4))
dev.off()  

width <- panelSize(2)*mmToInch
height <- panelSize(1.5)*mmToInch
fileName <- paste0("regLoopScorefromdTAG_loopScore")
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(p1, p3))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(plot_grid(p1, p3))
dev.off()  

width <- panelSize(2)*mmToInch
height <- panelSize(1.5)*mmToInch
fileName <- paste0("regLoopScorefromdTAG_log2fc")
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(p2, p4))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(plot_grid(p2, p4))
dev.off()  

[3.43] Checking RAD21 peak density at group1 and group2 promoter


peak.RAD21 <- importPeak(here(refDir, "33250_RAD21_ab992_Bruce-4_peaks.mergePeak.bed"))

##########################################################
flankSize <- 5000
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3),
                TSSstart = TSS - flankSize,
                TSSend = TSS + flankSize) %>%
  dplyr::select(V6, V5, V1, TSSstart, TSSend)
colnames(gene.tb) <- c("ensembl_gene_id", "external_gene_name", "chr", "start", "end")


# Convert gene.tb to a GRanges object
gene_gr <- GRanges(
  seqnames = gene.tb$chr,
  ranges = IRanges(start = gene.tb$start, end = gene.tb$end),
  gene_id = gene.tb$ensembl_gene_id
)

# Count overlaps between peaks and genes
overlap_counts <- countOverlaps(gene_gr, peak.RAD21)

# Add overlap counts to the original gene.tb data
gene.tb$peak_count <- overlap_counts

###############################################################################

peakNum0 <- (gene.tb %>% dplyr::filter(peak_count == 0))$ensembl_gene_id
peakNum1 <- (gene.tb %>% dplyr::filter(peak_count == 1))$ensembl_gene_id
peakNum2 <- (gene.tb %>% dplyr::filter(peak_count == 2))$ensembl_gene_id
peakNum3 <- (gene.tb %>% dplyr::filter(peak_count == 3))$ensembl_gene_id
peakNumOver4 <- (gene.tb %>% dplyr::filter(peak_count >= 4))$ensembl_gene_id


###############################################################################
name <- "chromo_cons_annoHierarchy"

alpha <- 0.05
fcCutoff <- 0.5
diff.RNA.G1.dTAG <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG.noFCcutoff <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::filter(padj < alpha)
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = 0.2,
                                      annoList = c("P-P", "P-E", "P-S", "P-X"))%>% 
  dplyr::mutate(distance = start2 - start1,
                peakID = paste(chrom1, start1, start2, sep = "_"))


## Dividing genes into groups
temp <- geneAnnoData %>% dplyr::select(diff_dTAG_DMSO, distance, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = mean(diff_dTAG_DMSO),
            mean_distance = mean(distance),
            .groups = 'drop')

diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)

maxLog2FC = 2

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% diff.RNA.G1.dTAG$ensembl_gene_id, "2DOWN", "0NO"),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                log2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag) %>%
  drop_na(shrinked_log2FC)

temp <- temp %>% dplyr::mutate(
  pnOver = ifelse(gene %in% peakNum0, "peakNum0",
                  ifelse(gene %in% peakNum1, "peakNum1",
                         ifelse(gene %in% peakNum2, "peakNum2",
                                ifelse(gene %in% peakNum3, "peakNum3",
                                       ifelse(gene %in% peakNumOver4, "peakNumOver4", NA)))))) %>%
  drop_na(pnOver)

###############################################################################
# Checking percentage of genes with RAD21 peaks
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene

aaa <- temp %>% dplyr::mutate(group = case_when(
  gene %in% group1 ~ "group1",
  gene %in% group2 ~ "group2",
  TRUE ~ NA
)) %>%
  dplyr::select(group, pnOver, gene) %>%
  dplyr::filter(group %in% c("group1", "group2")) %>%
  dplyr::mutate(hasPeak = ifelse(pnOver == "peakNum0", FALSE, TRUE))

bbb <- aaa %>% dplyr::filter(group == "group1") 

sum(bbb$hasPeak)
bbb <- aaa %>% dplyr::filter(group == "group2") 
sum(bbb$hasPeak)

aaa_summary <- aaa %>%
  group_by(group, pnOver) %>%
  summarize(count = n(), .groups = "drop")

# Create the stacked barplot
aaa_summary <- aaa %>%
  group_by(group, pnOver) %>%
  summarize(count = n(), .groups = "drop") %>%
  group_by(group) %>%
  mutate(ratio = count / sum(count))

# Define the gradient colors
gradient_colors <- c("#D4D4D4", "#DCB0AF", "#E48D8A", "#EC6965", "#F44641")
# gradient_colors <- c("#D4D4D4", "#A2BBCA", "#71A2C0", "#3F89B7", "#0E71AD")

# Create the stacked barplot with gradient colors
p <- ggplot(aaa_summary, aes(x = group, y = ratio, fill = pnOver)) +
  geom_bar(stat = "identity", position = "stack",
           linewidth = lineMedium * mmToLineUnit, lineend = "square") +
  theme_classic() + labs(x = NULL , y = "Ratio") +
  theme(
    axis.title = element_text(
      size = fontSizeM, family = fontType, color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.line = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  ) +
  scale_fill_manual(values = gradient_colors, 
                    guide = guide_legend(
      override.aes = list(size = 0.5), # Adjust legend key symbol size
      keywidth = 2 / 2.54, # Convert 2mm to cm
      keyheight = 2 / 2.54 # Convert 2mm to cm
    )) 

width <- panelSize(1.7)*mmToInch
height <- panelSize(1.3)*mmToInch
fileName <- paste0("rad21perc_promoter")
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()  
loop score
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(pnOver ==group1) )$mean_diff_score
  distance2 <- (data %>% dplyr::filter(pnOver ==group2) )$mean_diff_score
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

ps01 <- round(getPvalWilcox(temp, "peakNum0", "peakNum1"), 5)
ps12 <- round(getPvalWilcox(temp, "peakNum1", "peakNum2"), 5)
ps23 <- round(getPvalWilcox(temp, "peakNum2", "peakNum3"), 5)
ps34 <- round(getPvalWilcox(temp, "peakNum3", "peakNumOver4"), 5)
ps24 <- round(getPvalWilcox(temp, "peakNum2", "peakNumOver4"), 5)
ps14 <- round(getPvalWilcox(temp, "peakNum1", "peakNumOver4"), 5)
ps04 <- round(getPvalWilcox(temp, "peakNum0", "peakNumOver4"), 5)


p <- ggplot(temp, aes(x = pnOver, y = mean_diff_score)) + 
  geom_violin(aes(fill = pnOver), 
              color = "black",
              linewidth = lineMedium * mmToLineUnit, lineend = "square",
              show.legend = FALSE) +
  geom_boxplot(width = 0.3, color = "black",
               linewidth = lineMedium * mmToLineUnit, lineend = "square",
               outlier.shape = NA
  ) + theme_classic() + labs(x = NULL , y = "Average Δ loop score") +
  stat_summary(
    aes(group = pnOver), fun = mean,
    geom = "point", shape = 21, size = 0.5,
    fill = "black", color = "black"
  ) +
  geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
  theme(
    axis.title = element_text(
      size = fontSizeM, family = fontType, color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45, hjust = 1, vjust = 1
    ),
    axis.line = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )+
  annotate("text", x = 1, y = 0, label = paste0("ps01: ", convPvalue(ps01), "\n",
                                                "ps12: ", convPvalue(ps12), "\n",
                                                "ps23: ", convPvalue(ps23), "\n",
                                                "ps34: ", convPvalue(ps34), "\n",
                                                "ps24: ", convPvalue(ps24), "\n", 
                                                "ps14: ", convPvalue(ps14), "\n",
                                                "ps04: ", convPvalue(ps04), "\n"), 
           color = "black", hjust = 0, size = 2) +
    scale_fill_manual(values = c("#777777", "#8B7E65", "#A28452", "#C2884D", "#F28E2C")) +
  coord_cartesian(ylim = c(-0.5, 0.1))



fileName <- paste0("diffScore_barplot_RAD21peakWithin10kb_dTAG_vs_DMSO")
width <- 33*mmToInch
height <-38*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
log2FC
temp <- temp %>% dplyr::mutate(absLog2FC = abs(log2FoldChange))
p <- ggplot(temp, aes(x = absLog2FC, color = pnOver)) +
scale_color_manual(values = (c("#777777", "#8B7E65", "#A28452", "#C2884D", "#F28E2C"))) +
  stat_ecdf(size = 0.4, linewidth = lineMedium * mmToLineUnit, lineend = "square" ) + # Use stat_ecdf to plot the empirical CDF
  labs(
    x = "Abs. log2(fold change)",
    y = "Cumulative Probability"
  ) + coord_cartesian(xlim = c(0, 1.5)) +
  theme_classic() + # Clean theme
  theme(
      axis.title = element_text(
        size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.position = "none",
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
    ) + scale_y_continuous(labels = scales::number_format(accuracy = 0.1))
fileName <- paste0("log2FC_cdf_psGroup_dTAG_vs_DMSO")
width <- 33*mmToInch
height <-33*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

[3.44] Checking RAD21 peak density at group1 and group2 promoter


peak.RAD21 <- importPeak(here(refDir, "33250_RAD21_ab992_Bruce-4_peaks.mergePeak.bed"))
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene


##########################################################
name <- "chromo_cons_annoHierarchy"

geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = 0.2,
                                      annoList = c("P-E"))

temp1 <- geneAnnoData %>% dplyr::filter(A1 == "E") %>% dplyr::select(chrom1, start1, end1, gene)
colnames(temp1) <- c("chr", "start", "end", "gene")
temp2 <- geneAnnoData %>% dplyr::filter(A2 == "E") %>% dplyr::select(chrom2, start2, end2, gene)
colnames(temp2) <- c("chr", "start", "end", "gene")

enhAnchors <- bind_rows(temp1, temp2) %>% unnest(gene) %>%
  dplyr::mutate(
    group = case_when(
      gene %in% group1 ~ "Grp1",
      gene %in% group2 ~ "Grp2",
      TRUE ~ NA
    )
  ) %>% dplyr::filter(!is.na(group))

enh <- makeGRangesFromDataFrame(enhAnchors %>% dplyr::select(c(1, 2, 3)))

# Count overlaps between peaks and genes
overlap_counts <- countOverlaps(enh, peak.RAD21)

# Add overlap counts to the original gene.tb data
enhAnchors$peak_count <- overlap_counts

enhAnchors <- enhAnchors %>% dplyr::mutate(
  peakGroup = case_when(
    peak_count == 0 ~ "peakNum0",
    peak_count == 1 ~ "peakNum1",
    peak_count == 2 ~ "peakNum2",
    peak_count == 3 ~ "peakNum3",
    peak_count >= 4 ~ "peakNumOver4"
  )
)

aaa <- enhAnchors %>% 
  dplyr::select(group, peakGroup, gene) %>%
  dplyr::mutate(hasPeak = ifelse(peakGroup == "peakNum0", FALSE, TRUE))

bbb <- aaa %>% dplyr::filter(group == "Grp1") 
sum(bbb$hasPeak)/nrow(bbb)
bbb <- aaa %>% dplyr::filter(group == "Grp2") 
sum(bbb$hasPeak)/nrow(bbb)


aaa_summary <- aaa %>%
  group_by(group, peakGroup) %>%
  summarize(count = n(), .groups = "drop")

# Create the stacked barplot
aaa_summary <- aaa %>%
  group_by(group, peakGroup) %>%
  summarize(count = n(), .groups = "drop") %>%
  group_by(group) %>%
  mutate(ratio = count / sum(count))

# Define the gradient colors
# gradient_colors <- c("#D4D4D4", "#DCB0AF", "#E48D8A", "#EC6965", "#F44641")
gradient_colors <- c("#D4D4D4", "#A2BBCA", "#71A2C0", "#3F89B7", "#0E71AD")

# Create the stacked barplot with gradient colors
p <- ggplot(aaa_summary, aes(x = group, y = ratio, fill = peakGroup)) +
  geom_bar(stat = "identity", position = "stack",
           linewidth = lineMedium * mmToLineUnit, lineend = "square") +
  theme_classic() + labs(x = NULL , y = "Ratio") +
  theme(
    axis.title = element_text(
      size = fontSizeM, family = fontType, color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.line = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  ) +
  scale_fill_manual(values = gradient_colors, 
                    guide = guide_legend(
      override.aes = list(size = 0.5), # Adjust legend key symbol size
      keywidth = 2 / 2.54, # Convert 2mm to cm
      keyheight = 2 / 2.54 # Convert 2mm to cm
    )) 

width <- panelSize(1.7)*mmToInch
height <- panelSize(1.3)*mmToInch
fileName <- paste0("rad21perc_enh")
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()  

NOT CONCLUSIVE BELOW HERE


data <- fread(here(resultDir, "chromHMM", "A485_pe-pe_anchors", "overlap_enrich_100_state.txt"))
colnames(data) <- c("state", "genome", "pe-pe_A485_down", "pe-pe_A485_no", "pe-pe_A485_up")
data <- data %>% dplyr::select(-genome) %>% as.matrix()

  column_to_rownames(var = "state") %>%
  as.matrix()

library(circlize)

col_fun <- colorRamp2(c(0, 1, 10), 
                      c("blue", "white", "red"))

library(circlize)
col_fun <- colorRamp2(c(min(data), max(data)), c("white", "red"))


#fviz_nbclust(data, kmeans, method = "wss")

p <- Heatmap(
  data,
  name = "Odds Ratio",                   # Name of the heatmap legend
  cluster_columns = FALSE,            # Remove column dendrogram
  row_km = 10,                         # Define the number of k-means clusters for rows (adjust as needed)
  show_row_dend = FALSE,
  col = col_fun,
  border = TRUE
)

fileName <- paste0("anchorLOLA_dTAG_vs_DMSO_diff0.2_allLoops_extreme_regAnchorBackground_atac")
height <- 7
width <- 3.5
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

---
title: "R Notebook"
output: html_notebook
---
# LOADING REQUIREMENTS
## PACKAGES
```{r message=FALSE, warning=FALSE, include=FALSE}
# LIST OF PACKAGES
pkgs = c("tidyverse", "here", "svglite", "ggplot2", "ggrepel",
         "stringr", "BiocManager", "RColorBrewer", "viridis", "magick",
         "nlme", "factoextra", "devtools", "beepr", "remotes",
         "cowplot", "data.table", "strawr", "rtracklayer", "utils",
         "reshape2", "VennDiagram","gridExtra", "eulerr", "scales",
         "InteractionSet", "gplots", "locfdr", "future", "ggbeeswarm", "glossary")
bio_pkgs = c("biomaRt", "DESeq2", "ComplexHeatmap", "apeglm", "vsn",
             "rhdf5", "InteractionSet", "plotgardener", "HiCDCPlus",
             "GenomicInteractions", "LOLA", "AnnotationHub",
             "org.Mm.eg.db", "enrichplot", "DOSE", "clusterProfiler",
             "HiCExperiment", "HiCool", "HiContacts", "HiContactsData", "fourDNData", "DNAZooData",
             "MotifDb", "Biostrings")

# INSTALL PACKAGES
# install.packages(pkgs)
# BiocManager::install(bio_pkgs)

# INSTALL HiC related packages
# remotes::install_github("robinweide/GENOVA")
# devtools::install_github("thomasp85/scico")
# devtools::install_github("psyteachr/introdataviz")

# LOAD PACKAGES
lapply(pkgs, require, character.only = TRUE)
lapply(bio_pkgs, require, character.only = TRUE)
require(GENOVA)
require(scico)
require(introdataviz)

# CLEANING
rm(pkgs, bio_pkgs)

options(scipen=999)



```
## ENSEMBL
```{r}
# ENSEMBL DATABASE
# v102 is for the latest mm10 version
# ensembl.v102 <- useMart(host = "https://nov2020.archive.ensembl.org",
#                        biomart = "ENSEMBL_MART_ENSEMBL",
#                        dataset = "mmusculus_gene_ensembl")
```
## COLOR PALETTE
```{r}
palette_1 = list(red = "#E9002D",
                 amber = "#FFAA00",
                 green = "#00B000")
palette_2 = list(red = "#FF1F5B",
                 green = "#00CD6C",
                 blue = "#009ADE",
                 purple = "#AF58BA",
                 yellow = "#FFC61E",
                 orange = "#F28522",
                 grey = "#A0B1BA",
                 brown = "#A6761D")
palette_3 = list(grey1 = "#a0b1ba",
                 grey2 = "#c5d0d5",
                 grey3 = "#eceff1")

colorListLoop <-  c(palette_3[["grey3"]], palette_3[["grey2"]], palette_3[["grey1"]],
                     "#C5E1EF", "#6CB0D6", "#226E9C",
                     "#06592A", 
                     "#FED976", "#FD8D3C", "#E31A1C")
colorListPromoter <- c(palette_3[["grey3"]],"#E88587", "#E31A1C")
colorListEnhancer <- c(palette_3[["grey3"]],"#87AFC7", "#226E9C")
colorListStructure <- c(palette_3[["grey3"]], palette_3[["grey2"]], palette_3[["grey1"]])
```

## DIR LIST
```{r}
# 
# loopDir <- here("../..", "result", "loop", "chromo_detect")
# scoreDir <- here("../..", "result", "loop", "chromo_quantify")
# consensusDir <- here("../..", "result", "loop", "consensus")
# figDir <- here("../..", "figure", "loop")
# refDir <- here("../..", "reference")

```
## FIGURE PARAMETERS
```{r}
library(colorspace)

fontType <- "Helvetica"

fontSizeL <- 10 # pt
fontSizeM <- 8
fontSizeS <- 6

lineThick <- 0.75 # pt
lineMedium <- 0.5
lineThin <- 0.25

panelUnit <- 30 # mm
panelMargin <- 1.5

mmToInch <- 0.03937007874
mmToLineUnit <- 1/2.13
mmToLinePlotgarden <- 1/0.75
ptToMM <- 1/2.845


strong_red <- "#CB333A"
strong_blue <- "#4851A0"
weak_red <- lighten(strong_red, amount = 0.4)   # FF7D81
weak_blue <- lighten(strong_blue, amount = 0.4) # 8A91DD
no_grey <- "#A8A8A8"

strong_teel <- "#0892A5"
strong_green <- "#23CE6B" # A485
strong_darkgreen <- "#054A29"
strong_yellow <- "#FFBA49"
strong_orange <- "#F18F01" # dTAG
strong_lightpurple <- "#BD93D8"
strong_purple <- "#9E33CB" # Epi

panelSize <- function(num, unit = panelUnit, margin = panelMargin){
  return(num*unit - 2*margin)
}

```
## FUNCTIONS
```{r}
importBedpe = function(bedpe){
  a1 = makeGRangesFromDataFrame(data.frame(
    chr = bedpe$V1,
    start = bedpe$V2 +1,
    end = bedpe$V3))
  a2 = makeGRangesFromDataFrame(data.frame(
    chr = bedpe$V4,
    start = bedpe$V5 +1,
    end = bedpe$V6))
  GInteractions(a1, a2)
}

get_density <- function(x, y, ...) {
  dens <- MASS::kde2d(x, y, ...)
  ix <- findInterval(x, dens$x)
  iy <- findInterval(y, dens$y)
  ii <- cbind(ix, iy)
  return(dens$z[ii])
}
label_kb_mb <- function(x) {
  ifelse(x >= 1000000, paste0(x / 1000000, "Mb"), paste0(x / 1000, "kb"))
}

importPeak = function(fileName){
  df = fread(fileName)
  gr = makeGRangesFromDataFrame(data.frame(
    chr = df$V1, start = df$V2, end = df$V3
  ))
}
```



#### Comparing G1 vs Async for consensus loops
##### All loops
```{r}
diffCutoff = 0.2
data <- fread(here(consensusDir, "chromo_cons_score.tsv"))

temp <- fread(here(consensusDir, "chromo_cons_score_async.tsv")) %>%
  dplyr::select(id, UT, AID)

score.tb <- data %>% dplyr::full_join(temp, by = c("id")) %>%
  dplyr::mutate(diff_G1 = dTAG - DMSO,
                  diff_async = AID - UT)


### P1. UT vs AID
score.tb$density <- get_density(score.tb$UT, score.tb$AID, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$UT, score.tb$AID)
p1 <- ggplot(score.tb, aes(x = UT, y = AID, color = density)) +
  geom_point() + 
  scale_color_viridis() +
  xlim(-0.5, 1) + ylim(-0.5, 1) +
  coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
  geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
  theme_classic() + ggtitle(paste0("Consensus loop score")) +
  annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")

### P1. DMSO vs dTAG
score.tb$density <- get_density(score.tb$DMSO, score.tb$dTAG, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$DMSO, score.tb$dTAG)
p2 <- ggplot(score.tb, aes(x = DMSO, y = dTAG, color = density)) +
  geom_point() + 
  scale_color_viridis() +
  xlim(-0.5, 1) + ylim(-0.5, 1) +
  coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
  geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
  theme_classic() + ggtitle(paste0("Consensus loop score")) +
  annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")


### P3. DMSO vs A485
score.tb$density <- get_density(score.tb$DMSO, score.tb$A485, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$DMSO, score.tb$A485)
p3 <- ggplot(score.tb, aes(x = DMSO, y = A485, color = density)) +
  geom_point() + 
  scale_color_viridis() +
  xlim(-0.5, 1) + ylim(-0.5, 1) +
  coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
  geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
  theme_classic() + ggtitle(paste0("Consensus loop score")) +
  annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")

### P3. DMSO vs UT
score.tb$density <- get_density(score.tb$DMSO, score.tb$UT, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$DMSO, score.tb$UT)
p4 <- ggplot(score.tb, aes(x = DMSO, y = UT, color = density)) +
  geom_point() + 
  scale_color_viridis() +
  xlim(-0.5, 1) + ylim(-0.5, 1) +
  coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
  geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
  theme_classic() + ggtitle(paste0("Consensus loop score")) +
  annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")


### P3. dTAG vs AID
score.tb$density <- get_density(score.tb$dTAG, score.tb$AID, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$dTAG, score.tb$AID)
p5 <- ggplot(score.tb, aes(x = dTAG, y = AID, color = density)) +
  geom_point() + 
  scale_color_viridis() +
  xlim(-0.5, 1) + ylim(-0.5, 1) +
  coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
  geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
  theme_classic() + ggtitle(paste0("Consensus loop score")) +
  annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")

### P3. diff
score.tb$density <- get_density(score.tb$diff_G1, score.tb$diff_async, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$diff_G1, score.tb$diff_async)
p6 <- ggplot(score.tb, aes(x = diff_G1, y = diff_async, color = density)) +
  geom_point() + 
  scale_color_viridis() +
  xlim(-1, 0.5) + ylim(-1, 0.5) +
  coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_hline(yintercept = diffCutoff, alpha = 0.5, color = "grey") +
  geom_hline(yintercept = -diffCutoff, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = diffCutoff, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = -diffCutoff, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  theme_classic() + ggtitle(paste0("Consensus loop score")) +
  annotate("text", x = -1, y = 0.5, label = paste("r =", round(correlation, 2)), size = 5, color = "black")


png(here(figDir, paste0("consensus_score_scatterplot_pu100pz100_G1vsAsync.png")), res = 600, units = "in", width = 5*2.5, height = 2.5*2.5)
print(cowplot::plot_grid(p1, p2, p3, 
                         p4, p5, p6, align = "h", ncol = 3))
dev.off()
svglite(here(figDir, paste0("consensus_score_scatterplot_pu100pz100_G1vsAsync.svg")), width = 5*2.5, height = 2.5*2.5)
print(cowplot::plot_grid(p1, p2, p3, 
                         p4, p5, p6, align = "h", ncol = 3))
dev.off()


```
##### reg loops
```{r}
temp <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe")) %>%
  dplyr::mutate(res = V3 - V2,
                id = paste(V1, res, V2, V5, sep = "_"))
regID <- temp$id

diffCutoff = 0.2
data <- fread(here(consensusDir, "chromo_cons_score.tsv"))

temp <- fread(here(consensusDir, "chromo_cons_score_async.tsv")) %>%
  dplyr::select(id, UT, AID)

score.tb <- data %>% dplyr::full_join(temp, by = c("id")) %>%
  dplyr::mutate(diff_G1 = dTAG - DMSO,
                  diff_async = AID - UT) %>%
  dplyr::filter(id %in% regID)


### P1. UT vs AID
score.tb$density <- get_density(score.tb$UT, score.tb$AID, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$UT, score.tb$AID)
p1 <- ggplot(score.tb, aes(x = UT, y = AID, color = density)) +
  geom_point() + 
  scale_color_viridis() +
  xlim(-0.5, 1) + ylim(-0.5, 1) +
  coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
  geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
  theme_classic() + ggtitle(paste0("Consensus loop score")) +
  annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")

### P1. DMSO vs dTAG
score.tb$density <- get_density(score.tb$DMSO, score.tb$dTAG, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$DMSO, score.tb$dTAG)
p2 <- ggplot(score.tb, aes(x = DMSO, y = dTAG, color = density)) +
  geom_point() + 
  scale_color_viridis() +
  xlim(-0.5, 1) + ylim(-0.5, 1) +
  coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
  geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
  theme_classic() + ggtitle(paste0("Consensus loop score")) +
  annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")


### P3. DMSO vs A485
score.tb$density <- get_density(score.tb$DMSO, score.tb$A485, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$DMSO, score.tb$A485)
p3 <- ggplot(score.tb, aes(x = DMSO, y = A485, color = density)) +
  geom_point() + 
  scale_color_viridis() +
  xlim(-0.5, 1) + ylim(-0.5, 1) +
  coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
  geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
  theme_classic() + ggtitle(paste0("Consensus loop score")) +
  annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")

### P3. DMSO vs UT
score.tb$density <- get_density(score.tb$DMSO, score.tb$UT, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$DMSO, score.tb$UT)
p4 <- ggplot(score.tb, aes(x = DMSO, y = UT, color = density)) +
  geom_point() + 
  scale_color_viridis() +
  xlim(-0.5, 1) + ylim(-0.5, 1) +
  coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
  geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
  theme_classic() + ggtitle(paste0("Consensus loop score")) +
  annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")


### P3. dTAG vs AID
score.tb$density <- get_density(score.tb$dTAG, score.tb$AID, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$dTAG, score.tb$AID)
p5 <- ggplot(score.tb, aes(x = dTAG, y = AID, color = density)) +
  geom_point() + 
  scale_color_viridis() +
  xlim(-0.5, 1) + ylim(-0.5, 1) +
  coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
  geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
  theme_classic() + ggtitle(paste0("Consensus loop score")) +
  annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")

### P3. diff
score.tb$density <- get_density(score.tb$diff_G1, score.tb$diff_async, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$diff_G1, score.tb$diff_async)
p6 <- ggplot(score.tb, aes(x = diff_G1, y = diff_async, color = density)) +
  geom_point() + 
  scale_color_viridis() +
  xlim(-1, 0.5) + ylim(-1, 0.5) +
  coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  geom_hline(yintercept = diffCutoff, alpha = 0.5, color = "grey") +
  geom_hline(yintercept = -diffCutoff, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = diffCutoff, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = -diffCutoff, alpha = 0.5, color = "grey") +
    theme_classic() + ggtitle(paste0("Consensus loop score")) +
  annotate("text", x = -1, y = 0.5, label = paste("r =", round(correlation, 2)), size = 5, color = "black")


png(here(figDir, paste0("consensus_score_scatterplot_pu100pz100_G1vsAsync_reg.png")), res = 600, units = "in", width = 5*2.5, height = 2.5*2.5)
print(cowplot::plot_grid(p1, p2, p3, 
                         p4, p5, p6, align = "h", ncol = 3))
dev.off()
svglite(here(figDir, paste0("consensus_score_scatterplot_pu100pz100_G1vsAsync_reg.svg")), width = 5*2.5, height = 2.5*2.5)
print(cowplot::plot_grid(p1, p2, p3, 
                         p4, p5, p6, align = "h", ncol = 3))
dev.off()


########## Saving Async and G1 specific regulatory loop
diffCutoff <- 0.2
temp <- score.tb %>% dplyr::filter(diff_G1 >= -diffCutoff, diff_async >= -diffCutoff) %>%
  dplyr::select(seq(1, 7))
fwrite(temp, here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_G1vsAsync_bothRetained.bedpe"), sep = "\t", col.names = FALSE)

temp <- score.tb %>% dplyr::filter(diff_G1 >= -diffCutoff, diff_async < -diffCutoff) %>%
  dplyr::select(seq(1, 7))
fwrite(temp, here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_G1vsAsync_AsyncSpecificPert.bedpe"), sep = "\t", col.names = FALSE)

temp <- score.tb %>% dplyr::filter(diff_G1 < -diffCutoff, diff_async >= -diffCutoff) %>%
  dplyr::select(seq(1, 7))
fwrite(temp, here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_G1vsAsync_G1SpecificPert.bedpe"), sep = "\t", col.names = FALSE)

temp <- score.tb %>% dplyr::filter(diff_G1 < -diffCutoff, diff_async < -diffCutoff) %>%
  dplyr::select(seq(1, 7))
fwrite(temp, here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_G1vsAsync_bothPert.bedpe"), sep = "\t", col.names = FALSE)
```

##### str loops
```{r}
temp <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure.bedpe")) %>%
  dplyr::mutate(res = V3 - V2,
                id = paste(V1, res, V2, V5, sep = "_"))
regID <- temp$id

diffCutoff = 0.2
data <- fread(here(consensusDir, "chromo_cons_score.tsv"))

temp <- fread(here(consensusDir, "chromo_cons_score_async.tsv")) %>%
  dplyr::select(id, UT, AID)

score.tb <- data %>% dplyr::full_join(temp, by = c("id")) %>%
  dplyr::mutate(diff_G1 = dTAG - DMSO,
                  diff_async = AID - UT) %>%
  dplyr::filter(id %in% regID)


### P1. UT vs AID
score.tb$density <- get_density(score.tb$UT, score.tb$AID, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$UT, score.tb$AID)
p1 <- ggplot(score.tb, aes(x = UT, y = AID, color = density)) +
  geom_point() + 
  scale_color_viridis() +
  xlim(-0.5, 1) + ylim(-0.5, 1) +
  coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
  geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
  theme_classic() + ggtitle(paste0("Consensus loop score")) +
  annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")

### P1. DMSO vs dTAG
score.tb$density <- get_density(score.tb$DMSO, score.tb$dTAG, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$DMSO, score.tb$dTAG)
p2 <- ggplot(score.tb, aes(x = DMSO, y = dTAG, color = density)) +
  geom_point() + 
  scale_color_viridis() +
  xlim(-0.5, 1) + ylim(-0.5, 1) +
  coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
  geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
  theme_classic() + ggtitle(paste0("Consensus loop score")) +
  annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")


### P3. DMSO vs A485
score.tb$density <- get_density(score.tb$DMSO, score.tb$A485, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$DMSO, score.tb$A485)
p3 <- ggplot(score.tb, aes(x = DMSO, y = A485, color = density)) +
  geom_point() + 
  scale_color_viridis() +
  xlim(-0.5, 1) + ylim(-0.5, 1) +
  coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
  geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
  theme_classic() + ggtitle(paste0("Consensus loop score")) +
  annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")

### P3. DMSO vs UT
score.tb$density <- get_density(score.tb$DMSO, score.tb$UT, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$DMSO, score.tb$UT)
p4 <- ggplot(score.tb, aes(x = DMSO, y = UT, color = density)) +
  geom_point() + 
  scale_color_viridis() +
  xlim(-0.5, 1) + ylim(-0.5, 1) +
  coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
  geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
  theme_classic() + ggtitle(paste0("Consensus loop score")) +
  annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")


### P3. dTAG vs AID
score.tb$density <- get_density(score.tb$dTAG, score.tb$AID, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$dTAG, score.tb$AID)
p5 <- ggplot(score.tb, aes(x = dTAG, y = AID, color = density)) +
  geom_point() + 
  scale_color_viridis() +
  xlim(-0.5, 1) + ylim(-0.5, 1) +
  coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
  geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
  theme_classic() + ggtitle(paste0("Consensus loop score")) +
  annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")

### P3. diff
score.tb$density <- get_density(score.tb$diff_G1, score.tb$diff_async, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$diff_G1, score.tb$diff_async)
p6 <- ggplot(score.tb, aes(x = diff_G1, y = diff_async, color = density)) +
  geom_point() + 
  scale_color_viridis() +
  xlim(-1, 0.5) + ylim(-1, 0.5) +
  coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  geom_hline(yintercept = diffCutoff, alpha = 0.5, color = "grey") +
  geom_hline(yintercept = -diffCutoff, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = diffCutoff, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = -diffCutoff, alpha = 0.5, color = "grey") +
  theme_classic() + ggtitle(paste0("Consensus loop score")) +
  annotate("text", x = -1, y = 0.5, label = paste("r =", round(correlation, 2)), size = 5, color = "black")


png(here(figDir, paste0("consensus_score_scatterplot_pu100pz100_G1vsAsync_str.png")), res = 600, units = "in", width = 5*2.5, height = 2.5*2.5)
print(cowplot::plot_grid(p1, p2, p3, 
                         p4, p5, p6, align = "h", ncol = 3))
dev.off()
svglite(here(figDir, paste0("consensus_score_scatterplot_pu100pz100_G1vsAsync_str.svg")), width = 5*2.5, height = 2.5*2.5)
print(cowplot::plot_grid(p1, p2, p3, 
                         p4, p5, p6, align = "h", ncol = 3))
dev.off()


```
#### Comparing loops called from Async
```{r}
# Importing loops. For make comparison easier, 25 kb
binSize = 25*1000
temp <- fread(here(refDir, "Tjian_chromosight_loop.bedpe")) %>%
  dplyr::mutate(center1 = 0.5*(V2 + V3),
                center2 = 0.5*(V5 + V6),
                start1 = center1 - 0.5*binSize,
                end1 = center1 + 0.5*binSize,
                start2 = center2 - 0.5*binSize,
                end2 = center2 + 0.5*binSize) %>%
  dplyr::select(V1, start1, end1, V4, start2, end2)
colnames(temp) <- c("V1", "V2", "V3", "V4", "V5", "V6")
loop.async <- importBedpe(temp)


binSize = 25*1000
temp <- fread(here(consensusDir, "chromo_cons.bedpe")) %>%
  dplyr::mutate(center1 = 0.5*(V2 + V3),
                center2 = 0.5*(V5 + V6),
                start1 = center1 - 0.5*binSize,
                end1 = center1 + 0.5*binSize,
                start2 = center2 - 0.5*binSize,
                end2 = center2 + 0.5*binSize) %>%
  dplyr::select(V1, start1, end1, V4, start2, end2)
colnames(temp) <- c("V1", "V2", "V3", "V4", "V5", "V6")
loop.G1 <- importBedpe(temp)


# Check overlap
overlap <- findOverlaps(loop.async, loop.G1)


n.async <- nrow(as_tibble(loop.async))
n.G1 <- nrow(as_tibble(loop.G1))
n.async.overlap <- length(unique(queryHits(overlap)))
n.G1.overlap <- length(unique(subjectHits(overlap)))

plot(euler(c("Async" = n.async - n.async.overlap,
             "Async&G1" = n.async.overlap,
             "G1" = 1)), quantities = TRUE)


plot(euler(c("Async" =1,
             "Async&G1" = n.G1.overlap,
             "G1" = n.G1 - n.G1.overlap)), quantities = TRUE)




# Importing loops. For make comparison easier, 25 kb
binSize = 25*1000
temp.pp <- fread(here(refDir, "Tjian_chromosight_loop_P-P.bedpe")) %>%
  dplyr::mutate(center1 = 0.5*(V2 + V3),
                center2 = 0.5*(V5 + V6),
                start1 = center1 - 0.5*binSize,
                end1 = center1 + 0.5*binSize,
                start2 = center2 - 0.5*binSize,
                end2 = center2 + 0.5*binSize) %>%
  dplyr::select(V1, start1, end1, V4, start2, end2)
colnames(temp.pp) <- c("V1", "V2", "V3", "V4", "V5", "V6")
binSize = 25*1000
temp.ep <- fread(here(refDir, "Tjian_chromosight_loop_E-P.bedpe")) %>%
  dplyr::mutate(center1 = 0.5*(V2 + V3),
                center2 = 0.5*(V5 + V6),
                start1 = center1 - 0.5*binSize,
                end1 = center1 + 0.5*binSize,
                start2 = center2 - 0.5*binSize,
                end2 = center2 + 0.5*binSize) %>%
  dplyr::select(V1, start1, end1, V4, start2, end2)
colnames(temp.ep) <- c("V1", "V2", "V3", "V4", "V5", "V6")
temp <- bind_rows(temp.pp, temp.ep)
loop.async <- importBedpe(temp)


binSize = 25*1000
temp <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe")) %>%
  dplyr::mutate(center1 = 0.5*(V2 + V3),
                center2 = 0.5*(V5 + V6),
                start1 = center1 - 0.5*binSize,
                end1 = center1 + 0.5*binSize,
                start2 = center2 - 0.5*binSize,
                end2 = center2 + 0.5*binSize) %>%
  dplyr::select(V1, start1, end1, V4, start2, end2)
colnames(temp) <- c("V1", "V2", "V3", "V4", "V5", "V6")
loop.G1 <- importBedpe(temp)


# Check overlap
overlap <- findOverlaps(loop.async, loop.G1)


n.async <- nrow(as_tibble(loop.async))
n.G1 <- nrow(as_tibble(loop.G1))
n.async.overlap <- length(unique(queryHits(overlap)))
n.G1.overlap <- length(unique(subjectHits(overlap)))

plot(euler(c("Async" = n.async - n.async.overlap,
             "Async&G1" = n.async.overlap,
             "G1" = 1)), quantities = TRUE)


plot(euler(c("Async" =0.1,
             "Async&G1" = n.G1.overlap,
             "G1" = n.G1 - n.G1.overlap)), quantities = TRUE)

```


### [2.5] Consensus loop annotation
```{r}
#### Importing ChIP-exo peaks
refDir <- here("../..", "reference")
peak.H3K27ac <- importPeak(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.narrowPeak.bed"))
peak.H3K4me3 <- importPeak(here(refDir, "33255_H3K4me3_04-745_Bruce-4_peaks.mergePeak.bed"))
peak.CTCF <- importPeak(here(refDir, "33248_CTCF_07-729_Bruce-4_peaks.mergePeak.bed"))
peak.RAD21 <- importPeak(here(refDir, "33250_RAD21_ab992_Bruce-4_peaks.mergePeak.bed"))
peak.Whyte.SE <- importPeak(here(refDir, "superEnhancer_Whyte_ESC_mm10.bed"))
peak.Dylan.SE <- importPeak(here(refDir, "superEnhancer_Dylan_ESC.bed"))
```
#### Functions
```{r}
createLoopAnnotation <- function(bedpe.loop.anno, name, figDir, outDir, colorList){
  temp = bedpe.loop.anno %>%
    dplyr::mutate(sample = name)
  
  num = nrow(temp)
  p7 = ggplot(temp, aes(x = sample, fill = Anno2)) +
    geom_bar(color = "black") +
    theme_bw() +
    labs(title = paste0(num, " loops"),
         x = "", y = "Counts") +
    scale_y_continuous(labels = comma_format()) +
    theme(plot.title = element_text(hjust = 0.5),
          aspect.ratio = 5,
          legend.position = "right",
          legend.direction  = "vertical") +
    scale_fill_manual(values = colorList)
  
  width = 3
  height = 5
  svglite(here(figDir,
               paste0("loopClassify_", name, ".svg")),
          width = width, height = height)
  plot(p7)
  invisible(dev.off())
  png(here(figDir,
           paste0("loopClassify_", name, ".png")),
      width = width, height = height, res = 600, units = "in")
  plot(p7)
  invisible(dev.off())
}


annotateLoopRelaxedTSS <- function(bedpe.anno){
  temp = bedpe.anno %>% dplyr::rowwise() %>%
    dplyr::mutate(
      A1 = if_else((A1_H3K4me3TSS), "P", 
                   if_else((A1_H3K27ac), "E",
                           if_else((A1_CTCF|A1_RAD21), "S", "X"))),
      A2 = if_else((A2_H3K4me3TSS), "P", 
                   if_else((A2_H3K27ac), "E",
                           if_else((A2_CTCF|A2_RAD21), "S", "X")))
    )
  temp = temp %>% dplyr::rowwise() %>%
    dplyr::mutate(Anno = paste0(A1, "-", A2),
                  Anno2 = if_else(Anno == "E-P", "P-E", 
                                  if_else(Anno == "S-P", "P-S",
                                          if_else(Anno == "X-P", "P-X",
                                                  if_else(Anno == "S-E", "E-S",
                                                          if_else(Anno == "X-E", "E-X",
                                                                  if_else(Anno == "X-S", "S-X",
                                                                          Anno)))))),
    )
  
  temp$Anno2 = factor(temp$Anno2, level = c("X-X",
                                            "S-X", "S-S",
                                            "E-X","E-S","E-E",
                                            "P-E","P-X", "P-S", "P-P"))
  
  # Checking the precense of super enhancer
  temp <- temp %>% dplyr::rowwise() %>%
    dplyr::mutate(AnnoSE = ifelse(A1_Whyte.SE | A2_Whyte.SE, "SE", "NO"))
  temp$AnnoSE <- factor(temp$AnnoSE, level = c("SE", "NO"))
  
  return(temp)
}

annotateLoopPromoterTSS <- function(bedpe.anno){
  temp = bedpe.anno %>% dplyr::rowwise() %>%
    dplyr::mutate(
      A1 = if_else((A1_H3K4me3TSS), "P", "N"),
      A2 = if_else((A2_H3K4me3TSS), "P", "N")
    )
  temp = temp %>% dplyr::rowwise() %>%
    dplyr::mutate(Anno = paste0(A1, "-", A2),
                  Anno2 = if_else(Anno == "N-P", "P-N", Anno))
  
  temp$Anno2 = factor(temp$Anno2, level = c("N-N", "P-N", "P-P"))
  
  return(temp)
}

annotateLoopEnhancer <- function(bedpe.anno){
  temp = bedpe.anno %>% dplyr::rowwise() %>%
    dplyr::mutate(
      A1 = if_else((A1_H3K27ac), "E", "N"),
      A2 = if_else((A2_H3K27ac), "E", "N")
    )
  temp = temp %>% dplyr::rowwise() %>%
    dplyr::mutate(Anno = paste0(A1, "-", A2),
                  Anno2 = if_else(Anno == "N-E", "E-N", Anno))
  temp$Anno2 = factor(temp$Anno2, level = c("N-N", "E-N", "E-E"))
  
  return(temp)
}

annotateLoopStructure <- function(bedpe.anno){
  temp = bedpe.anno %>% dplyr::rowwise() %>%
    dplyr::mutate(
      A1 = if_else((A1_CTCF|A1_RAD21), "S", "N"),
      A2 = if_else((A2_CTCF|A2_RAD21), "S", "N")
    )
  temp = temp %>% dplyr::rowwise() %>%
    dplyr::mutate(Anno = paste0(A1, "-", A2),
                  Anno2 = if_else(Anno == "N-S", "S-N", Anno))
  temp$Anno2 = factor(temp$Anno2, level = c("N-N", "S-N", "S-S"))
  return(temp)
}


annotateAnchorTSS <- function(bedpe){
  tb.loop = 
    setOverlapColumn("Whyte.SE",
                     setOverlapColumn("CTCF",
                                      setOverlapColumn("RAD21",
                                                       setOverlapColumn("H3K27ac",
                                                                        setOverlapColumn("H3K4me3TSS", bedpe)))))
  return(tb.loop)
}

setOverlapColumn <- function(peakName, loop){
  tb.loop = as_tibble(loop)
  overlap = returnOverlapIndexLixt(get(paste0("peak.", peakName)), tb.loop)
  tb.loop[[paste0("A1_", peakName)]] = FALSE
  tb.loop[[paste0("A1_", peakName)]][overlap[[1]]] = TRUE
  tb.loop[[paste0("A2_", peakName)]] = FALSE
  tb.loop[[paste0("A2_", peakName)]][overlap[[2]]] = TRUE
  return(tb.loop)
}

returnOverlapIndexLixt <- function(peak, loop){
  anchor1.tb = as_tibble(loop) %>% dplyr::select(chrom1, start1, end1)
  anchor1 = makeGRangesFromDataFrame(data.frame(
    chr = anchor1.tb$chrom1,
    start = anchor1.tb$start1,
    end = anchor1.tb$end1
  ))
  
  anchor2.tb = as_tibble(loop) %>% dplyr::select(chrom2, start2, end2)
  anchor2 = makeGRangesFromDataFrame(data.frame(
    chr = anchor2.tb$chrom2,
    start = anchor2.tb$start2,
    end = anchor2.tb$end2
  ))
  
  overlap = list(overlap1 = unique(queryHits(findOverlaps(anchor1, peak))),
                 overlap2 = unique(queryHits(findOverlaps(anchor2, peak))))
  return(overlap)
}

saveAnnoGroupBedpe <- function(temp, anno.list, name, annoName, ouDir){
  loop = temp %>% dplyr::filter(Anno2 %in% anno.list) %>%
    dplyr::select(c("chrom1", "start1", "end1", "chrom2", "start2", "end2"))
  fwrite(loop, here(outDir, paste0(name, "_", annoName, ".bedpe")), sep = "\t", col.names = FALSE)
}
```
#### Annotation
```{r}
consensus.loop.tb <- fread(here(consensusDir, "chromo_cons_score.tsv"))

################################################################################
# Filtering H3K4me3 peaks that has TSS nearby
# Since the finest resolution is 5kb, +-2.5kb will be used as a cutoff for checking TSS presence
flankSize <- 2500
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3),
                TSSstart = TSS - flankSize,
                TSSend = TSS + flankSize) %>%
  dplyr::select(V1, TSSstart, TSSend)
colnames(gene.tb) <- c("chr", "start", "end")
TSS1kb.gr <- makeGRangesFromDataFrame(gene.tb)
temp <- peak.H3K4me3[unique(queryHits(findOverlaps(peak.H3K4me3, TSS1kb.gr)))]
fwrite(as_tibble(temp), here(refDir, 
                             paste0("33255_H3K4me3_04-745_Bruce-4_peaks.mergePeak.", 
                                    flankSize/1000, "kbTSS.bed")), sep = "\t", col.names = FALSE)
peak.H3K4me3TSS <- importPeak(here(refDir, paste0("33255_H3K4me3_04-745_Bruce-4_peaks.mergePeak.", 
                                    flankSize/1000, "kbTSS.bed")))
################################################################################
# Annotating with strict priority (P-TSS > E > S)
temp.anno.TSS <- annotateAnchorTSS(consensus.loop.tb) %>%
  dplyr::mutate(
    diff_dTAG_DMSO = (dTAG-DMSO),
    diff_A485_DMSO = (A485-DMSO))

name <- "chromo_cons_annoHierarchy"
consensus.loop.anno.tb <- annotateLoopRelaxedTSS(temp.anno.TSS)
fwrite(consensus.loop.anno.tb, here(consensusDir, paste0(name, ".tsv")), 
       sep = "\t", col.names = TRUE)
createLoopAnnotation(consensus.loop.anno.tb, name, figDir, outDir,  colorListLoop)
saveAnnoGroupBedpe(consensus.loop.anno.tb, unique(consensus.loop.anno.tb$Anno2) , name, "all", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.tb, c("P-P", "P-E", "P-S", "P-X", "E-E", "E-S", "E-X"), name, "regulatory", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.tb, c("S-S", "S-X"), name, "structure", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.tb, c("X-X"), name, "x-x", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.tb, c("P-P", "P-E", "P-S", "P-X"), name, "p-n", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.tb, c("P-P", "P-E", "E-E"), name, "pe-pe", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.tb, c("P-P", "P-E"), name, "p-pe", outDir)

saveAnnoGroupBedpe(consensus.loop.anno.tb, c("P-P"), name, "p-p", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.tb, c("P-E"), name, "p-e", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.tb, c("P-S"), name, "p-s", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.tb, c("P-X"), name, "p-x", outDir)

################################################################################
# Annotating with one marker (Promoter)
name <- "chromo_cons_annoPromoter"
consensus.loop.anno.promoter.tb <- annotateLoopPromoterTSS(temp.anno.TSS)
fwrite(consensus.loop.anno.promoter.tb, here(consensusDir, paste0(name, ".tsv")), 
       sep = "\t", col.names = TRUE)
createLoopAnnotation(consensus.loop.anno.promoter.tb, name, 
                     figDir, outDir,  colorListPromoter)
saveAnnoGroupBedpe(consensus.loop.anno.promoter.tb, unique(consensus.loop.anno.promoter.tb$Anno2) , name, "all", outDir)  
saveAnnoGroupBedpe(consensus.loop.anno.promoter.tb, c("P-P"), name, "p-p", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.promoter.tb, c("P-N"), name, "p-n", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.promoter.tb, c("N-N"), name, "n-n", outDir)

################################################################################
# Annotating with one marker (Enhancer)
name <- "chromo_cons_annoEnhancer"
consensus.loop.anno.enhancer.tb <- annotateLoopEnhancer(temp.anno.TSS)
fwrite(consensus.loop.anno.enhancer.tb, here(consensusDir, paste0(name, ".tsv")), 
       sep = "\t", col.names = TRUE)
createLoopAnnotation(consensus.loop.anno.enhancer.tb, name, 
                     figDir, outDir,  colorListEnhancer)
saveAnnoGroupBedpe(consensus.loop.anno.enhancer.tb, unique(consensus.loop.anno.enhancer.tb$Anno2) , name, "all", outDir)  
saveAnnoGroupBedpe(consensus.loop.anno.enhancer.tb, c("E-E"), name, "e-e", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.enhancer.tb, c("E-N"), name, "e-n", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.enhancer.tb, c("N-N"), name, "n-n", outDir)
################################################################################
# Annotating with one marker (Structure)
name <- "chromo_cons_annoStructure"
consensus.loop.anno.structure.tb <- annotateLoopStructure(temp.anno.TSS)
fwrite(consensus.loop.anno.structure.tb, here(consensusDir, paste0(name, ".tsv")), 
       sep = "\t", col.names = TRUE)
createLoopAnnotation(consensus.loop.anno.structure.tb, name, 
                     figDir, outDir,  colorListStructure)
saveAnnoGroupBedpe(consensus.loop.anno.structure.tb, unique(consensus.loop.anno.structure.tb$Anno2) , name, "all", outDir)  
saveAnnoGroupBedpe(consensus.loop.anno.structure.tb, c("S-S"), name, "s-s", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.structure.tb, c("S-N"), name, "s-n", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.structure.tb, c("N-N"), name, "n-n", outDir)
```

##### Pure-reg vs Str-reg
```{r}
################################################################################
# 2024.09.10 Splitting regulatory loop into pure regulatory and structure-related loops
name <- "chromo_cons_annoHierarchy"
consensus.loop.anno.tb <- fread(here(consensusDir, paste0(name, ".tsv")))
regulatory.loop.anno.tb <- consensus.loop.anno.tb %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "E-E"))

regulatory.loop.anno.tb <- regulatory.loop.anno.tb %>% rowwise() %>%
  dplyr::mutate(
    Anno3 = ifelse(A1_RAD21 | A1_CTCF | A2_RAD21 | A2_RAD21, paste0("str"),
                   paste0("reg"))
  )


fwrite(regulatory.loop.anno.tb, here(consensusDir, paste0(name, "_Anno3.tsv")), 
       sep = "\t", col.names = TRUE)

regulatory.loop.anno.tb_reg <- regulatory.loop.anno.tb %>% dplyr::filter(Anno3 == "reg")
regulatory.loop.anno.tb_str <- regulatory.loop.anno.tb %>% dplyr::filter(Anno3 == "str")

saveAnnoGroupBedpe(regulatory.loop.anno.tb_reg, c("P-P", "P-E", "E-E"), name, "pe-pe_reg", outDir)
saveAnnoGroupBedpe(regulatory.loop.anno.tb_str, c("P-P", "P-E", "E-E"), name, "pe-pe_str", outDir)

```


### [2.6] Comparing across samples for conesnsus loop
#### Functions
```{r}
create_loop_dis_vs_score <- function(data, figDir, name, Anno2List){
  data = data %>% dplyr::filter(Anno2 %in% Anno2List)
  
  ### barplot
  temp <- data %>% dplyr::select(id, DMSO, dTAG, A485) %>%
    pivot_longer(!id, names_to = "treatment", values_to = "score")
  temp$treatment <- factor(temp$treatment, levels = c("DMSO", "dTAG", "A485"))
  
  p3 <- ggplot(temp, aes(x = treatment, y = score)) +
    geom_violin(aes(fill = treatment), show.legend = FALSE) +
    scale_fill_manual(values = c("DMSO" = "grey", "dTAG" = "pink", "A485" = "skyblue")) +
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    ylim(-0.5, 1) +
    geom_hline(yintercept = 0, linetype = "dashed", color = "grey") +
    theme_classic() +
    ggtitle(name) +
    theme(plot.title = element_text(size = 4))
  fileName <- paste0("score_barplot_", name)
  png(here(figDir, paste0(fileName, ".png")), 
      res = 600, units = "in", width = 2, height = 4)
  print(p3)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")), 
          width = 2, height = 4)
  print(p3)
  dev.off()
  
  ### Distance vs score
  
  temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, DMSO, dTAG, A485)

  avg_scores <- temp %>%
    group_by(distance) %>%
    summarise(across(starts_with("DMSO"):starts_with("A485"), mean, na.rm = TRUE))

  avg_scores_long <- avg_scores %>%
    pivot_longer(cols = DMSO:A485, names_to = "condition", values_to = "avg_score")
  avg_scores_long$condition <- factor(avg_scores_long$condition, levels = c("DMSO", "dTAG", "A485"))
  # Create the plot
  p4 <- ggplot(avg_scores_long, aes(x = distance, y = avg_score, color = condition, fill = condition)) +
    geom_smooth(show.legend = TRUE) + ylim(0, 0.5) +
    theme_classic() +  scale_x_continuous(labels = label_kb_mb) +
    scale_color_manual(values = c("DMSO" = "grey", "dTAG" = "pink", "A485" = "skyblue")) +
    scale_fill_manual(values = c("DMSO" = "grey80", "dTAG" = "pink", "A485" = "skyblue")) +
    labs(title = paste0(name),
         x = "Distance",
         y = "Average Score") +
    theme(plot.title = element_text(size = 5))
  fileName <- paste0("dist_vs_score_linePlot_", name)
  png(here(figDir, paste0(fileName, ".png")),
      res = 600, units = "in", width = 4, height = 3)
  print(p4)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),
      width = 4, height = 3)
  print(p4)
  dev.off()
}

create_loop_dis_vs_diffscore <- function(data, figDir, name, Anno2List){
  data = data %>% dplyr::filter(Anno2 %in% Anno2List)
  
  
  temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, diff_dTAG_DMSO, diff_A485_DMSO)
  
  avg_scores <- temp %>%
    group_by(distance) %>%
    summarise(across(1:2, mean, na.rm = TRUE))
  
  avg_scores_long <- avg_scores %>%
    pivot_longer(cols = 2:3, names_to = "condition", values_to = "avg_score")
  avg_scores_long$condition <- factor(avg_scores_long$condition, 
                                      levels = c("diff_dTAG_DMSO", "diff_A485_DMSO"))
  # Create the plot
  p4 <- ggplot(avg_scores_long, aes(x = distance, y = avg_score, color = condition, fill = condition)) +
    geom_smooth(show.legend = TRUE) + geom_hline(yintercept = 0) + ylim(-0.5, 0.1) +
    theme_classic() +  scale_x_continuous(labels = label_kb_mb) +
    scale_color_manual(values = c("diff_dTAG_DMSO" = "pink", "diff_A485_DMSO" = "skyblue")) +
    scale_fill_manual(values = c("diff_dTAG_DMSO" = "pink", "diff_A485_DMSO" = "skyblue")) +
    labs(title = paste0(name),
         x = "Distance",
         y = "Average Diff Score") +
    theme(plot.title = element_text(size = 5))
  fileName <- paste0("dist_vs_score_difflinePlot_", name)
  png(here(figDir, paste0(fileName, ".png")),
      res = 600, units = "in", width = 4.5, height = 3)
  print(p4)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),
          width = 4.5, height = 3)
  print(p4)
  dev.off()
}

create_loop_scatterplot <- function(data, figDir, name, Anno2List, diffCutoff){
  data <- data %>%
    dplyr::mutate(updown_dTAG_DMSO = ifelse(diff_dTAG_DMSO > diffCutoff, "UP", 
                                            ifelse(diff_dTAG_DMSO > -diffCutoff, "NO", "DOWN")),
                  updown_A485_DMSO = ifelse(diff_A485_DMSO > diffCutoff, "UP", 
                                            ifelse(diff_A485_DMSO > -diffCutoff, "NO", "DOWN"))) %>%
    dplyr::filter(Anno2 %in% Anno2List)
  data$updown_dTAG_DMSO <- factor(data$updown_dTAG_DMSO, levels = c("UP", "NO", "DOWN"))
  data$updown_A485_DMSO <- factor(data$updown_A485_DMSO, levels = c("UP", "NO", "DOWN"))
  
  num.up <- (summary(data$updown_dTAG_DMSO))["UP"]
  num.no <- (summary(data$updown_dTAG_DMSO))["NO"]
  num.down <- (summary(data$updown_dTAG_DMSO))["DOWN"]
  num.all <- num.up + num.no + num.down
  perc.up <- round(num.up / num.all * 100, 2)
  perc.no <- round(num.no / num.all * 100, 2)
  perc.down <- round(num.down / num.all * 100, 2)
  
  ### Scatterplot
  data$density <- get_density(data$DMSO, data$dTAG, n = 100)
  data <- data %>% dplyr::arrange(density)
  correlation <- cor(data$DMSO, data$dTAG)
  p1 <- ggplot(data, aes(x = DMSO, y = dTAG, color = density)) +
    geom_point(size = 1,
      alpha = 1,
      stroke = 0) + 
    scale_color_viridis(option = "D", guide = guide_colorbar(
                          barwidth = 1.5/5.08,  # Adjust width of the color bar
                          barheight = 15/5.08   # Adjust height of the color bar
                        )) +
    xlim(-0.5, 1) + ylim(-0.5, 1) + coord_fixed() +
    geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed",
      color = "black",
      size = lineThick*mmToLineUnit,
      lineend = "square") +
    geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted",
      color = "black",
      size = lineThick*mmToLineUnit,
      lineend = "square") +
    geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted",
      color = "black",
      size = lineThick*mmToLineUnit,
      lineend = "square") +
    geom_hline(yintercept = 0, alpha = 0.5, color = "grey",
      color = "black",
      size = lineThick*mmToLineUnit,
      lineend = "square") +
    geom_vline(xintercept = 0, alpha = 0.5, color = "grey",
      color = "black",
      size = lineThick*mmToLineUnit,
      lineend = "square") +
    annotate("text", x = -0.5, y = 1, label = paste0("UP: ", num.up, " (", perc.up, "%)"), 
             color = "black", hjust = 0, , size = 1,
        family = fontType) +
    annotate("text", x = -0.5, y = 1-0.1, label = paste0("NO: ", num.no, " (", perc.no, "%)"), 
             color = "black", hjust = 0, , size = 1,
        family = fontType) +
    annotate("text", x = -0.5, y = 1-0.2, label = paste0("DOWN: ", num.down, " (", perc.down, "%)"), 
             color = "black", hjust = 0, , size = 1,
        family = fontType) +
    theme_classic() +
    #annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black") +
    theme(
      # legend.position = "none",
      plot.title = element_text(
        hjust = 0.5,
        size = fontSizeS,
        family = fontType
      ),
      axis.title = element_text(
        size = fontSizeS,
        family = fontType,
        color = "#000000"
      ),
      axis.text = element_text(
        size = fontSizeS,
        family = fontType,
        color = "#000000"
      ),
      axis.line = element_line(
        color = "#000000",
        size = lineThick*mmToLineUnit,
        lineend = "square"
      ),
      axis.ticks = element_line(
        color = "#000000",
        size = lineThick*mmToLineUnit,
        lineend = "square"
      ),
      panel.background = element_rect(fill = "transparent"),
    ) +
  xlab("Loop score\nG1.DMSO") +
    ylab("Loop score\nG1.dTAG")
  
  
  
  num.up <- (summary(data$updown_A485_DMSO))["UP"]
  num.no <- (summary(data$updown_A485_DMSO))["NO"]
  num.down <- (summary(data$updown_A485_DMSO))["DOWN"]
  num.all <- num.up + num.no + num.down
  perc.up <- round(num.up / num.all * 100, 2)
  perc.no <- round(num.no / num.all * 100, 2)
  perc.down <- round(num.down / num.all * 100, 2)
  
  data$density <- get_density(data$DMSO, data$A485, n = 100)
  data <- data %>% dplyr::arrange(density)
  correlation <- cor(data$DMSO, data$A485)
  p2 <- ggplot(data, aes(x = DMSO, y = A485, color = density)) +
    geom_point(size = 1,
      alpha = 1,
      stroke = 0) + 
    scale_color_viridis(option = "D", guide = guide_colorbar(
                          barwidth = 1.5/5.08,  # Adjust width of the color bar
                          barheight = 15/5.08   # Adjust height of the color bar
                        )) +
    xlim(-0.5, 1) + ylim(-0.5, 1) + coord_fixed() +
    geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed",
      color = "black",
      size = lineThick*mmToLineUnit,
      lineend = "square") +
    geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted",
      color = "black",
      size = lineThick*mmToLineUnit,
      lineend = "square") +
    geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted",
      color = "black",
      size = lineThick*mmToLineUnit,
      lineend = "square") +
    geom_hline(yintercept = 0, alpha = 0.5, color = "grey",
      color = "black",
      size = lineThick*mmToLineUnit,
      lineend = "square") +
    geom_vline(xintercept = 0, alpha = 0.5, color = "grey",
      color = "black",
      size = lineThick*mmToLineUnit,
      lineend = "square") +
    annotate("text", x = -0.5, y = 1, label = paste0("UP: ", num.up, " (", perc.up, "%)"), 
             color = "black", hjust = 0, size = 1,
        family = fontType) +
    annotate("text", x = -0.5, y = 1-0.1, label = paste0("NO: ", num.no, " (", perc.no, "%)"), 
             color = "black", hjust = 0, size = 1,
        family = fontType) +
    annotate("text", x = -0.5, y = 1-0.2, label = paste0("DOWN: ", num.down, " (", perc.down, "%)"), 
             color = "black", hjust = 0, size = 1,
        family = fontType) +
    theme_classic() + 
    # annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black") +
    theme(
      # legend.position = "none",
      plot.title = element_text(
        hjust = 0.5,
        size = fontSizeS,
        family = fontType
      ),
      axis.title = element_text(
        size = fontSizeS,
        family = fontType,
        color = "#000000"
      ),
      axis.text = element_text(
        size = fontSizeS,
        family = fontType,
        color = "#000000"
      ),
      axis.line = element_line(
        color = "#000000",
        size = lineThick*mmToLineUnit,
        lineend = "square"
      ),
      axis.ticks = element_line(
        color = "#000000",
        size = lineThick*mmToLineUnit,
        lineend = "square"
      ),
      panel.background = element_rect(fill = "transparent"),
    ) +
    xlab("Loop score\nG1.DMSO") +
    ylab("Loop score\nG1.A485")

    width <- panelSize(1.5)*mmToInch
  height <- panelSize(1.5)*mmToInch  
  
  fileName <- here(figDir,paste0("scatterplot_", name, "_dTAG_vs_DMSO_", diffCutoff))

  svglite(paste0(fileName, ".svg"), width = width, height =height)
  print(p1)
  dev.off()
  png(paste0(fileName, ".png"), width = width, height =height, res = 600, unit = "in")
  print(p1)
  dev.off()
  
    fileName <- here(figDir, paste0("scatterplot_", name, "_A485_vs_DMSO_", diffCutoff))

  svglite(paste0(fileName, ".svg"), width = width, height =height)
  print(p2)
  dev.off()
  png(paste0(fileName, ".png"), width = width, height =height, res = 600, unit = "in")
  print(p2)
  dev.off()
  
  # 
  # 
  # 
  # png(here(figDir, paste0(fileName, ".png")), res = 600, units = "in", width = 5*1.5, height = 2.5*1.5)
  # print(cowplot::plot_grid(p1, p2, align = "h"))
  # dev.off()
  # 
  # svglite(here(figDir, paste0(fileName, ".svg")), 
  #         width = 5*1.5, height = 2.5*1.5)
  # print(cowplot::plot_grid(p1, p2, align = "h"))
  # dev.off()
}

make_diff_bedpe <- function(data, name, Anno2List, outDir, diffCutoff){
  data <- data %>%
    dplyr::mutate(updown_dTAG_DMSO = ifelse(diff_dTAG_DMSO > diffCutoff, "UP", 
                                            ifelse(diff_dTAG_DMSO > -diffCutoff, "NO", "DOWN")),
                  updown_A485_DMSO = ifelse(diff_A485_DMSO > diffCutoff, "UP", 
                                            ifelse(diff_A485_DMSO > -diffCutoff, "NO", "DOWN"))) %>%
    dplyr::filter(Anno2 %in% Anno2List)
  data$updown_dTAG_DMSO <- factor(data$updown_dTAG_DMSO, levels = c("UP", "NO", "DOWN"))
  data$updown_A485_DMSO <- factor(data$updown_A485_DMSO, levels = c("UP", "NO", "DOWN"))
  
  out.temp <- data %>% dplyr::filter(updown_dTAG_DMSO == "UP") %>% dplyr::select(c(1, 2, 3, 4, 5, 6))
  fwrite(out.temp, here(outDir, paste0(name, "_dTAGvsDMSO_UP_diff", diffCutoff, ".bedpe")), 
         sep = "\t", col.names = FALSE)
  out.temp <- data %>% dplyr::filter(updown_dTAG_DMSO == "NO") %>% dplyr::select(c(1, 2, 3, 4, 5, 6))
  fwrite(out.temp, here(outDir, paste0(name, "_dTAGvsDMSO_NO_diff", diffCutoff, ".bedpe")), 
         sep = "\t", col.names = FALSE)
  out.temp <- data %>% dplyr::filter(updown_dTAG_DMSO == "DOWN") %>% dplyr::select(c(1, 2, 3, 4, 5, 6))
  fwrite(out.temp, here(outDir, paste0(name, "_dTAGvsDMSO_DOWN_diff", diffCutoff, ".bedpe")), 
         sep = "\t", col.names = FALSE)
  
  out.temp <- data %>% dplyr::filter(updown_A485_DMSO == "UP") %>% dplyr::select(c(1, 2, 3, 4, 5, 6))
  fwrite(out.temp, here(outDir, paste0(name, "_A485vsDMSO_UP_diff", diffCutoff, ".bedpe")), 
         sep = "\t", col.names = FALSE)
  out.temp <- data %>% dplyr::filter(updown_A485_DMSO == "NO") %>% dplyr::select(c(1, 2, 3, 4, 5, 6))
  fwrite(out.temp, here(outDir, paste0(name, "_A485vsDMSO_NO_diff", diffCutoff, ".bedpe")), 
         sep = "\t", col.names = FALSE)
  out.temp <- data %>% dplyr::filter(updown_A485_DMSO == "DOWN") %>% dplyr::select(c(1, 2, 3, 4, 5, 6))
  fwrite(out.temp, here(outDir, paste0(name, "_A485vsDMSO_DOWN_diff", diffCutoff, ".bedpe")), 
         sep = "\t", col.names = FALSE)
}

create_dist_vs_avgScore_perTreatment <- function(data, figDir, name, loopList, colorList, se = FALSE){
  avg_scores_long <- data %>%
    group_by(distance, Anno2) %>%
    summarise(avg_score = mean(score, na.rm = TRUE)) %>%
    ungroup() 
  avg_scores_long$Anno2 <- factor(avg_scores_long$Anno2, level = loopList)
  p4 <- ggplot(avg_scores_long, aes(x = distance, y = avg_score, color = Anno2, fill = Anno2)) + 
    geom_smooth(show.legend = TRUE, se = se)  +
    ylim(0, 0.5) +
    theme_classic() +  scale_x_continuous(labels = label_kb_mb) +
    scale_color_manual(values = colorList) +
    scale_fill_manual(values = colorList) +
    labs(title = paste0(name),
         x = "Distance",
         y = "Average Score") +
    theme(plot.title = element_text(size = 8))
  
  fileName <- paste0("dist_vs_score_linePlot_", name)
  png(here(figDir, paste0(fileName, ".png")), 
      res = 600, units = "in", width = 4, height = 3)
  print(p4)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")), 
          width = 4, height = 3)
  print(p4)
  dev.off()
} 

create_dist_vs_avgDiffScore_perTreatment <- function(data, figDir, name, loopList, colorList, se = FALSE){
  avg_scores_long <- data %>%
    group_by(distance, Anno2) %>%
    summarise(avg_score = mean(score, na.rm = TRUE)) %>%
    ungroup() 
  avg_scores_long$Anno2 <- factor(avg_scores_long$Anno2, level = loopList)
  p4 <- ggplot(avg_scores_long, aes(x = distance, y = avg_score, color = Anno2, fill = Anno2)) + 
    geom_hline(yintercept = 0) +
    geom_smooth(show.legend = TRUE, se = se)  +
    theme_classic() +  scale_x_continuous(labels = label_kb_mb) +
    scale_color_manual(values = colorList) +
    scale_fill_manual(values = colorList) +
    labs(title = paste0(name),
         x = "Distance",
         y = "Average Diff Score") +
    theme(plot.title = element_text(size = 8)) 
  
  fileName <- paste0("dist_vs_score_difflinePlot_", name)
  png(here(figDir, paste0(fileName, ".png")), 
      res = 600, units = "in", width = 4, height = 3)
  print(p4)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")), 
          width = 4, height = 3)
  print(p4)
  dev.off()
} 

create_score_barplot_perTreatment <- function(data, figDir, name, loopList, colorList){
  data$Anno2 <- factor(data$Anno2, levels = loopList)
  
  p3 <- ggplot(data, aes(x = Anno2, y = score)) +
    geom_violin(aes(fill = Anno2), show.legend = FALSE) +
    scale_fill_manual(values = colorList) +
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    ylim(-0.5, 1) +
    geom_hline(yintercept = 0, linetype = "dashed", color = "grey") +
    theme_classic() +
    ggtitle(name) +
    theme(plot.title = element_text(size = 4))
  
  fileName <- paste0("score_barPlot_", name)
  
  png(here(figDir, paste0(fileName, ".png")), 
      res = 600, units = "in", width = 2, height = 4)
  print(p3)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")), 
          width = 2, height = 4)
  print(p3)
}


iterate_loop_functions <- function(data, figDir, groupName, annoList){
  # create_loop_dis_vs_score(data, figDir, groupName, annoList)
  # create_loop_dis_vs_diffscore(data, figDir, groupName, annoList)

  create_loop_scatterplot(data, figDir, groupName, annoList, 0.2)
  # create_loop_scatterplot(data, figDir, groupName, annoList, 0.1)
  # make_diff_bedpe(data, groupName, annoList, consensusDir, 0.2)
  # make_diff_bedpe(data, groupName, annoList, consensusDir, 0.1)
}
```

#### Relaxed annotation
```{r}

name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))

iterate_loop_functions(data, figDir, paste0(name, "_", "cl_all"), unique(data$Anno2))
iterate_loop_functions(data, figDir, paste0(name, "_", "cl_structure"), 
                       c("S-S", "S-X"))
iterate_loop_functions(data, figDir, paste0(name, "_", "s-s"), 
                       c("S-S"))
iterate_loop_functions(data, figDir, paste0(name, "_", "cl_regulatory"), 
                       c("P-P", "P-E", "P-S", "P-X", "E-E", "E-S", "E-X"))
iterate_loop_functions(data, figDir, paste0(name, "_", "cl_pe-pe"), 
                       c("P-P", "P-E", "E-E"))
iterate_loop_functions(data, figDir, paste0(name, "_", "p-pe"), 
                       c("P-P", "P-E"))
iterate_loop_functions(data, figDir, paste0(name, "_", "p-p"), 
                       c("P-P"))
iterate_loop_functions(data, figDir, paste0(name, "_", "p-e"), 
                       c("P-E"))
iterate_loop_functions(data, figDir, paste0(name, "_", "e-e"), 
                       c("E-E"))
iterate_loop_functions(data, figDir, paste0(name, "_", "x-x"), 
                       c("X-X"))
###########
# Creating differential scatterplot
View(data)



#######################
# Creating figures per each condition
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment(temp, figDir, paste0(name, "_DMSO"), c("X-X",
                                                                 "S-X", "S-S",
                                                                 "E-X","E-S","E-E",
                                                                 "P-E","P-X", "P-S", "P-P"), colorListLoop)
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, dTAG, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment(temp, figDir, paste0(name, "_dTAG"), c("X-X",
                                                                 "S-X", "S-S",
                                                                 "E-X","E-S","E-E",
                                                                 "P-E","P-X", "P-S", "P-P"), colorListLoop)

temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, A485, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment(temp, figDir, paste0(name, "_A485"), c("X-X",
                                                                 "S-X", "S-S",
                                                                 "E-X","E-S","E-E",
                                                                 "P-E","P-X", "P-S", "P-P"), colorListLoop)

#######################
# Creating figures per each condition, differential
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, diff_dTAG_DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgDiffScore_perTreatment(temp, figDir, paste0(name, "_dTAG"), c("X-X",
                                                                 "S-X", "S-S",
                                                                 "E-X","E-S","E-E",
                                                                 "P-E","P-X", "P-S", "P-P"), colorListLoop)
create_dist_vs_avgDiffScore_perTreatment(temp, figDir, paste0(name, "_dTAG_SE"), c("X-X",
                                                                 "S-X", "S-S",
                                                                 "E-X","E-S","E-E",
                                                                 "P-E","P-X", "P-S", "P-P"), colorListLoop, se = TRUE)
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, diff_A485_DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgDiffScore_perTreatment(temp, figDir, paste0(name, "_A485"), c("X-X",
                                                                 "S-X", "S-S",
                                                                 "E-X","E-S","E-E",
                                                                 "P-E","P-X", "P-S", "P-P"), colorListLoop)

create_dist_vs_avgDiffScore_perTreatment(temp, figDir, paste0(name, "_A485_SE"), c("X-X",
                                                                 "S-X", "S-S",
                                                                 "E-X","E-S","E-E",
                                                                 "P-E","P-X", "P-S", "P-P"), colorListLoop, se = TRUE)
```
##### Pure-reg vs str-reg
```{r}
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, "_Anno3.tsv")))
diffCutoff <- 0.2

# Splitting data
data.reg <- data %>% dplyr::filter(Anno3 == "reg")
data.str <- data %>% dplyr::filter(Anno3 == "str")

# Taking codes from previous function (str)
data <- data.str
data <- data %>%
  dplyr::mutate(updown_dTAG_DMSO = ifelse(diff_dTAG_DMSO > diffCutoff, "UP", 
                                          ifelse(diff_dTAG_DMSO > -diffCutoff, "NO", "DOWN")),
                updown_A485_DMSO = ifelse(diff_A485_DMSO > diffCutoff, "UP", 
                                          ifelse(diff_A485_DMSO > -diffCutoff, "NO", "DOWN")))
data$updown_dTAG_DMSO <- factor(data$updown_dTAG_DMSO, levels = c("UP", "NO", "DOWN"))
data$updown_A485_DMSO <- factor(data$updown_A485_DMSO, levels = c("UP", "NO", "DOWN"))

num.up <- (summary(data$updown_dTAG_DMSO))["UP"]
num.no <- (summary(data$updown_dTAG_DMSO))["NO"]
num.down <- (summary(data$updown_dTAG_DMSO))["DOWN"]
num.all <- num.up + num.no + num.down
perc.up <- round(num.up / num.all * 100, 2)
perc.no <- round(num.no / num.all * 100, 2)
perc.down <- round(num.down / num.all * 100, 2)

data$density <- get_density(data$DMSO, data$dTAG, n = 100)
data <- data %>% dplyr::arrange(density)
p1 <- ggplot(data, aes(x = DMSO, y = dTAG, color = density)) +
  geom_point(show.legend = FALSE) + 
  scale_color_viridis() +
  xlim(-0.5, 1) + ylim(-0.5, 1) + coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
  geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  annotate("text", x = -0.5, y = 1, label = paste0("UP: ", num.up, " (", perc.up, "%)"), 
           color = "black", hjust = 0, size = 3) +
  annotate("text", x = -0.5, y = 1-0.1, label = paste0("NO: ", num.no, " (", perc.no, "%)"), 
           color = "black", hjust = 0, size = 3) +
  annotate("text", x = -0.5, y = 1-0.2, label = paste0("DOWN: ", num.down, " (", perc.down, "%)"), 
           color = "black", hjust = 0, size = 3) +
  theme_classic() + ggtitle(name) + theme(plot.title = element_text(size = 5))

# Taking codes from previous function (reg)
data <- data.reg
data <- data %>%
  dplyr::mutate(updown_dTAG_DMSO = ifelse(diff_dTAG_DMSO > diffCutoff, "UP", 
                                          ifelse(diff_dTAG_DMSO > -diffCutoff, "NO", "DOWN")),
                updown_A485_DMSO = ifelse(diff_A485_DMSO > diffCutoff, "UP", 
                                          ifelse(diff_A485_DMSO > -diffCutoff, "NO", "DOWN")))
data$updown_dTAG_DMSO <- factor(data$updown_dTAG_DMSO, levels = c("UP", "NO", "DOWN"))
data$updown_A485_DMSO <- factor(data$updown_A485_DMSO, levels = c("UP", "NO", "DOWN"))

num.up <- (summary(data$updown_dTAG_DMSO))["UP"]
num.no <- (summary(data$updown_dTAG_DMSO))["NO"]
num.down <- (summary(data$updown_dTAG_DMSO))["DOWN"]
num.all <- num.up + num.no + num.down
perc.up <- round(num.up / num.all * 100, 2)
perc.no <- round(num.no / num.all * 100, 2)
perc.down <- round(num.down / num.all * 100, 2)

data$density <- get_density(data$DMSO, data$dTAG, n = 100)
data <- data %>% dplyr::arrange(density)
p2 <- ggplot(data, aes(x = DMSO, y = dTAG, color = density)) +
  geom_point(show.legend = FALSE) + 
  scale_color_viridis() +
  xlim(-0.5, 1) + ylim(-0.5, 1) + coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
  geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  annotate("text", x = -0.5, y = 1, label = paste0("UP: ", num.up, " (", perc.up, "%)"), 
           color = "black", hjust = 0, size = 3) +
  annotate("text", x = -0.5, y = 1-0.1, label = paste0("NO: ", num.no, " (", perc.no, "%)"), 
           color = "black", hjust = 0, size = 3) +
  annotate("text", x = -0.5, y = 1-0.2, label = paste0("DOWN: ", num.down, " (", perc.down, "%)"), 
           color = "black", hjust = 0, size = 3) +
  theme_classic() + ggtitle(name) + theme(plot.title = element_text(size = 5))  
  

fileName <- paste0("scatterplot_", name, "_", diffCutoff, "_pure_str_reg")
png(here(figDir, paste0(fileName, ".png")), res = 600, units = "in", width = 5*1.5, height = 2.5*1.5)
print(cowplot::plot_grid(p1, p2, align = "h"))
dev.off()

svglite(here(figDir, paste0(fileName, ".svg")), 
        width = 5*1.5, height = 2.5*1.5)
print(cowplot::plot_grid(p1, p2, align = "h"))
dev.off()
```
##### 50kb vs 200 kb
```{r}
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, "_Anno3.tsv")))

data <- data %>% dplyr::mutate(size = start2 - start1)

data_under50kb <- data %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "E-E"), size < 50*1000, Anno3 == "reg")
data_under200kb <- data %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "E-E"), size >= 50*1000, size < 200*1000, Anno3 == "reg")
data_over200kb <- data %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "E-E"), size >= 200*1000, Anno3 == "reg")

########

data <- data_under50kb

diffCutoff <- 0.2
data <- data %>%
  dplyr::mutate(updown_dTAG_DMSO = ifelse(diff_dTAG_DMSO > diffCutoff, "UP", 
                                          ifelse(diff_dTAG_DMSO > -diffCutoff, "NO", "DOWN")),
                updown_A485_DMSO = ifelse(diff_A485_DMSO > diffCutoff, "UP", 
                                          ifelse(diff_A485_DMSO > -diffCutoff, "NO", "DOWN")))
data$updown_dTAG_DMSO <- factor(data$updown_dTAG_DMSO, levels = c("UP", "NO", "DOWN"))
data$updown_A485_DMSO <- factor(data$updown_A485_DMSO, levels = c("UP", "NO", "DOWN"))

####
num.up <- (summary(data$updown_dTAG_DMSO))["UP"]
num.no <- (summary(data$updown_dTAG_DMSO))["NO"]
num.down <- (summary(data$updown_dTAG_DMSO))["DOWN"]
num.all <- num.up + num.no + num.down
perc.up <- round(num.up / num.all * 100, 2)
perc.no <- round(num.no / num.all * 100, 2)
perc.down <- round(num.down / num.all * 100, 2)

data$density <- get_density(data$DMSO, data$dTAG, n = 100)
data <- data %>% dplyr::arrange(density)
p1 <- ggplot(data, aes(x = DMSO, y = dTAG, color = density)) +
  geom_point(show.legend = FALSE) + 
  scale_color_viridis() +
  xlim(-0.5, 1) + ylim(-0.5, 1) + coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
  geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  annotate("text", x = -0.5, y = 1, label = paste0("UP: ", num.up, " (", perc.up, "%)"), 
           color = "black", hjust = 0, size = 3) +
  annotate("text", x = -0.5, y = 1-0.1, label = paste0("NO: ", num.no, " (", perc.no, "%)"), 
           color = "black", hjust = 0, size = 3) +
  annotate("text", x = -0.5, y = 1-0.2, label = paste0("DOWN: ", num.down, " (", perc.down, "%)"), 
           color = "black", hjust = 0, size = 3) +
  theme_classic() + ggtitle(name) + theme(plot.title = element_text(size = 5))  
  

####
num.up <- (summary(data$updown_A485_DMSO))["UP"]
num.no <- (summary(data$updown_A485_DMSO))["NO"]
num.down <- (summary(data$updown_A485_DMSO))["DOWN"]
num.all <- num.up + num.no + num.down
perc.up <- round(num.up / num.all * 100, 2)
perc.no <- round(num.no / num.all * 100, 2)
perc.down <- round(num.down / num.all * 100, 2)

data$density <- get_density(data$DMSO, data$A485, n = 100)
data <- data %>% dplyr::arrange(density)
p2 <- ggplot(data, aes(x = DMSO, y = A485, color = density)) +
  geom_point(show.legend = FALSE) + 
  scale_color_viridis() +
  xlim(-0.5, 1) + ylim(-0.5, 1) + coord_fixed() +
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
  geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  annotate("text", x = -0.5, y = 1, label = paste0("UP: ", num.up, " (", perc.up, "%)"), 
           color = "black", hjust = 0, size = 3) +
  annotate("text", x = -0.5, y = 1-0.1, label = paste0("NO: ", num.no, " (", perc.no, "%)"), 
           color = "black", hjust = 0, size = 3) +
  annotate("text", x = -0.5, y = 1-0.2, label = paste0("DOWN: ", num.down, " (", perc.down, "%)"), 
           color = "black", hjust = 0, size = 3) +
  theme_classic() + ggtitle(name) + theme(plot.title = element_text(size = 5))  
  

fileName <- paste0("scatterplot_", name, "_", diffCutoff, "_regunder50kb_pure")
png(here(figDir, paste0(fileName, ".png")), res = 600, units = "in", width = 5*1.5, height = 2.5*1.5)
print(cowplot::plot_grid(p1, p2, align = "h"))
dev.off()

svglite(here(figDir, paste0(fileName, ".svg")), 
        width = 5*1.5, height = 2.5*1.5)
print(cowplot::plot_grid(p1, p2, align = "h"))
dev.off()

```

#### One feature annotation - promoter
```{r}

name <- "chromo_cons_annoPromoter"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
iterate_loop_functions(data, figDir, paste0(name, "_", "all"), unique(data$Anno2))
iterate_loop_functions(data, figDir, paste0(name, "_", "p-p"), c("P-P"))
iterate_loop_functions(data, figDir, paste0(name, "_", "p-n"), c("P-N"))
iterate_loop_functions(data, figDir, paste0(name, "_", "n-n"), c("N-N"))

colorList <- colorListPromoter
loopList <- c("N-N", "P-N", "P-P")

temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment(temp, figDir, paste0(name, "_DMSO"), 
                                     loopList, colorList, se = TRUE)
create_score_barplot_perTreatment(temp, figDir, paste0(name, "_DMSO"), 
                                      loopList, colorList)
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, dTAG, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment(temp, figDir, paste0(name, "_dTAG"), 
                                     loopList, colorList, se = TRUE)

create_score_barplot_perTreatment(temp, figDir, paste0(name, "_dTAG"), 
                                      loopList, colorList)
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, A485, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment(temp, figDir, paste0(name, "_A485"), 
                                     loopList, colorList, se = TRUE)
create_score_barplot_perTreatment(temp, figDir, paste0(name, "_A485"), 
                                      loopList, colorList)

temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, diff_dTAG_DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgDiffScore_perTreatment(temp, figDir, paste0(name, "_dTAG"), 
                                     loopList, colorList, se = TRUE)

temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, diff_A485_DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgDiffScore_perTreatment(temp, figDir, paste0(name, "_A485"), 
                                     loopList, colorList, se = TRUE)
```

#### One feature annotation - enhancer
```{r}
name <- "chromo_cons_annoEnhancer"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
iterate_loop_functions(data, figDir, paste0(name, "_", "all"), unique(data$Anno2))
iterate_loop_functions(data, figDir, paste0(name, "_", "e-e"), c("E-E"))
iterate_loop_functions(data, figDir, paste0(name, "_", "e-n"), c("E-N"))
iterate_loop_functions(data, figDir, paste0(name, "_", "n-n"), c("N-N"))

colorList <- colorListEnhancer
loopList <- c("N-N", "E-N", "E-E")

temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment(temp, figDir, paste0(name, "_DMSO"), 
                                     loopList, colorList, se = TRUE)
create_score_barplot_perTreatment(temp, figDir, paste0(name, "_DMSO"), 
                                      loopList, colorList)
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, dTAG, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment(temp, figDir, paste0(name, "_dTAG"), 
                                     loopList, colorList, se = TRUE)
create_score_barplot_perTreatment(temp, figDir, paste0(name, "_dTAG"), 
                                      loopList, colorList)
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, A485, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment(temp, figDir, paste0(name, "_A485"), 
                                     loopList, colorList, se = TRUE)
create_score_barplot_perTreatment(temp, figDir, paste0(name, "_A485"), 
                                      loopList, colorList)

temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, diff_dTAG_DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgDiffScore_perTreatment(temp, figDir, paste0(name, "_dTAG"), 
                                     loopList, colorList, se = TRUE)

temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, diff_A485_DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgDiffScore_perTreatment(temp, figDir, paste0(name, "_A485"), 
                                     loopList, colorList, se = TRUE)
```
#### One feature annotation - structure

```{r}
name <- "chromo_cons_annoStructure"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
iterate_loop_functions(data, figDir, paste0(name, "_", "all"), unique(data$Anno2))
iterate_loop_functions(data, figDir, paste0(name, "_", "s-s"), c("S-S"))
iterate_loop_functions(data, figDir, paste0(name, "_", "s-n"), c("S-N"))
iterate_loop_functions(data, figDir, paste0(name, "_", "n-n"), c("N-N"))

colorList <- colorListStructure
loopList <- c("N-N", "S-N", "S-S")

temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment(temp, figDir, paste0(name, "_DMSO"), 
                                     loopList, colorList, se = TRUE)
create_score_barplot_perTreatment(temp, figDir, paste0(name, "_DMSO"), 
                                      loopList, colorList)
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, dTAG, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment(temp, figDir, paste0(name, "_dTAG"), 
                                     loopList, colorList, se = TRUE)
create_score_barplot_perTreatment(temp, figDir, paste0(name, "_dTAG"), 
                                      loopList, colorList)
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, A485, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment(temp, figDir, paste0(name, "_A485"), 
                                     loopList, colorList, se = TRUE)
create_score_barplot_perTreatment(temp, figDir, paste0(name, "_A485"), 
                                      loopList, colorList)

temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, diff_dTAG_DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgDiffScore_perTreatment(temp, figDir, paste0(name, "_dTAG"), 
                                     loopList, colorList, se = TRUE)

temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, diff_A485_DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgDiffScore_perTreatment(temp, figDir, paste0(name, "_A485"), 
                                     loopList, colorList, se = TRUE)

```

### [2.7] Distance vs loop # and score
```{r}
data <- fread(here(consensusDir, "chromo_cons_score.tsv")) %>%
  dplyr::mutate(distance = start2 - start1,
                res = end1 - start1)

p1 <- ggplot(data, aes(x = distance, fill = factor(res))) +
  geom_histogram(binwidth = 50000, alpha = 1) +
  labs(title = "Distribution of Distance by Resolution",
       x = "Distance",
       y = "Counts") + 
  facet_wrap(~ res, ncol = 1, scales = "free") + 
  scale_x_continuous(labels = label_kb_mb, limits = c(0, 5e6)) + 
  theme_classic() +
  theme(legend.position = "none",
        plot.title = element_text(size = 8))  # Removes the legend

png(here(figDir, paste0("consensus_dist_per_res.png")), 
    res = 600, units = "in", width = 3, height = 6)
print(p1)
dev.off()

p2 <- ggplot(data, aes(x = distance, fill = factor(res))) +
  geom_histogram(binwidth = 50000, alpha = 1) +
  labs(title = "Distribution of Distance by Resolution",
       x = "Distance",
       y = "Counts") + 
  facet_wrap(~ res, ncol = 1, scales = "free") + 
  scale_x_continuous(labels = label_kb_mb, limits = c(0, 2e6)) + 
  theme_classic() +
  theme(legend.position = "none",
        plot.title = element_text(size = 8))  # Removes the legend


png(here(figDir, paste0("consensus_dist_per_res_2mb.png")), 
    res = 600, units = "in", width = 3, height = 6)
print(p2)
dev.off()


```


```{r}
### Distance vs score
  
  temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, DMSO, dTAG, A485)
  
  avg_scores <- temp %>%
    group_by(distance) %>%
    summarise(across(starts_with("DMSO"):starts_with("A485"), mean, na.rm = TRUE))
  
  avg_scores_long <- avg_scores %>%
    pivot_longer(cols = DMSO:A485, names_to = "condition", values_to = "avg_score")
  avg_scores_long$condition <- factor(avg_scores_long$condition, levels = c("DMSO", "dTAG", "A485"))
  # Create the plot
  p4 <- ggplot(avg_scores_long, aes(x = distance, y = avg_score, color = condition, fill = condition)) +
    geom_smooth(show.legend = TRUE) + ylim(0, 0.5) +
    theme_classic() +  scale_x_continuous(labels = label_kb_mb) +
    scale_color_manual(values = c("DMSO" = "grey", "dTAG" = "pink", "A485" = "skyblue")) +
    scale_fill_manual(values = c("DMSO" = "grey80", "dTAG" = "pink", "A485" = "skyblue")) +
    labs(title = paste0("Distance vs. Average Score, ", note),
         x = "Distance",
         y = "Average Score") +
    theme(plot.title = element_text(size = 12))
  png(here(figDir, paste0("consensus_dist_vs_avgScore_allRes_pu100pz100_", note, ".png")), 
      res = 600, units = "in", width = 4, height = 3)
  print(p4)
  dev.off()

```

### [2.8] Annotating genes to P-P and P-E loops
#### Link gene to promoter anchor
Here, anchor was linked to a gene based on whether the anchor overlaps with TSS +- 2.5kb region
```{r}
# Annotating genes based on promoter anchor
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
```


```{r}
flankSize <- 2500
gene.TSS.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3),
                TSSstart = TSS - flankSize,
                TSSend = TSS + flankSize) %>%
  dplyr::select(V1, TSSstart, TSSend, V4, V5, V6)
colnames(gene.TSS.tb) <- c("chr", "start", "end", "strand", "gene", "ensembl")

fwrite(gene.TSS.tb, here(refDir, "mm10_GRCm38.p6_TSS2.5kb.bed"), sep = "\t", col.names = FALSE, row.names = FALSE)

findOverlapGene <- function(gene.TSS.tb, chrom1, start1, end1){
  temp <- gene.TSS.tb %>% dplyr::filter(chr == chrom1) %>%
    dplyr::filter((start <= end1) & (end >= start1))
  return(temp$gene)
}
findOverlapEnsembl <- function(gene.TSS.tb, chrom1, start1, end1){
  temp <- gene.TSS.tb %>% dplyr::filter(chr == chrom1) %>%
    dplyr::filter((start <= end1) & (end >= start1))
  return(temp$ensembl)
}

temp <- data %>% rowwise() %>%
  dplyr::mutate(A1_gene = ifelse(A1 == "P",
                                 list(findOverlapGene(gene.TSS.tb, chrom1, start1, end1)),
                                 NA),
                A2_gene = ifelse(A2 == "P",
                                 list(findOverlapGene(gene.TSS.tb, chrom2, start2, end2)),
                                 NA),
                gene = list(unique(c(A1_gene, A2_gene))))

temp_p_n <- temp %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "P-S", "P-X")) %>% dplyr::select(-c("A1_gene", "A2_gene"))
fwrite(temp_p_n, here(consensusDir, paste0(name, "_p-n_geneList.tsv")), sep = "\t")

temp_p_pe <- temp %>% dplyr::filter(Anno2 %in% c("P-P", "P-E")) %>% dplyr::select(-c("A1_gene", "A2_gene"))
fwrite(temp_p_pe, here(consensusDir, paste0(name, "_p-pe_geneList.tsv")), sep = "\t")

temp <- data %>% rowwise() %>%
  dplyr::mutate(A1_gene = ifelse(A1 == "P",
                                 list(findOverlapEnsembl(gene.TSS.tb, chrom1, start1, end1)),
                                 NA),
                A2_gene = ifelse(A2 == "P",
                                 list(findOverlapEnsembl(gene.TSS.tb, chrom2, start2, end2)),
                                 NA),
                gene = list(unique(c(A1_gene, A2_gene))))

temp_p_n <- temp %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "P-S", "P-X")) %>% dplyr::select(-c("A1_gene", "A2_gene"))
fwrite(temp_p_n, here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")), sep = "\t")

temp_p_pe <- temp %>% dplyr::filter(Anno2 %in% c("P-P", "P-E")) %>% dplyr::select(-c("A1_gene", "A2_gene"))
fwrite(temp_p_pe, here(consensusDir, paste0(name, "_p-pe_ensemblList.tsv")), sep = "\t")

```
### [2.9] Size distribution of diff loops

```{r}
create_dist_barplot <- function(data, figDir, name, note, loopList, diffCutoff){
  temp <- data %>% dplyr::select(Anno2, distance, updown_dTAG_DMSO, updown_A485_DMSO) %>%
    dplyr::filter(Anno2 %in% loopList,
                  updown_dTAG_DMSO %in% c("UP", "NO", "DOWN"))
  p <- ggplot(temp, aes(x = updown_dTAG_DMSO, y = distance)) +
    geom_violin(aes(fill = updown_dTAG_DMSO)) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = updown_dTAG_DMSO), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() + ggtitle(note) +
    scale_y_continuous(labels = label_kb_mb)
  fileName <- paste0("size_barplot_", name, "_dTAG_vs_DMSO_", note, "_", diffCutoff)
  height <- 3
  width <- 4
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
  
  
  temp <- data %>% dplyr::select(Anno2, distance, updown_dTAG_DMSO, updown_A485_DMSO) %>%
    dplyr::filter(Anno2 %in% loopList,
                  updown_A485_DMSO %in% c("UP", "NO",  "DOWN"))
  p <- ggplot(temp, aes(x = updown_A485_DMSO, y = distance)) +
    geom_violin(aes(fill = updown_A485_DMSO)) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = updown_A485_DMSO), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() + ggtitle(note) +
    scale_y_continuous(labels = label_kb_mb)
  fileName <- paste0("size_barplot_", name, "_A485_vs_DMSO_", note, "_", diffCutoff)
  height <- 3
  width <- 4
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}


name
diffCutoff <- 0.2
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
data <- data %>%
  dplyr::mutate(updown_dTAG_DMSO = ifelse(diff_dTAG_DMSO > diffCutoff, "UP", 
                                          ifelse(diff_dTAG_DMSO > -diffCutoff, "NO", "DOWN")),
                updown_A485_DMSO = ifelse(diff_A485_DMSO > diffCutoff, "UP", 
                                          ifelse(diff_A485_DMSO > -diffCutoff, "NO", "DOWN")),
                distance = start2 - start1)
data$updown_dTAG_DMSO <- factor(data$updown_dTAG_DMSO, levels = c("UP", "NO", "DOWN"))
data$updown_A485_DMSO <- factor(data$updown_A485_DMSO, levels = c("UP", "NO", "DOWN"))
create_dist_barplot(data, figDir, name, "p-n", c("P-P", "P-E", "P-S", "P-X"), 0.2)
create_dist_barplot(data, figDir, name, "p-pe", c("P-P", "P-E"), 0.2)
create_dist_barplot(data, figDir, name, "pe-pe", c("P-P", "P-E", "E-E"), 0.2)
create_dist_barplot(data, figDir, name, "str", c("S-S", "S-X"), 0.2)

```
### [2.10] Linking with RNA-seq/PRO-seq
#### Function & parameter
```{r}
GOdir <- here("../..", "result", "loop", "GO")
dir.create(GOdir, showWarnings = FALSE, recursive = TRUE)

getGO <- function(name, figDir, geneList, categoryNum = 15, height = 10, width = 7){
  GO <- enrichGO(gene = geneList, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
  GO.readable <- setReadable(GO, OrgDb = org.Mm.eg.db)
  fwrite(as.data.frame(GO), here(GOdir, paste0("GO_", name, "_ensembl.tsv")), sep = "\t")
  fwrite(as.data.frame(GO.readable), here(GOdir, paste0("GO_", name, "_readable.tsv")), sep = "\t")
  
  if(nrow(as.data.frame(GO)) != 0){
    #####
    fileName <- paste0("GO_", name)
    svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
    print(dotplot(GO, showCategory = categoryNum, title = name) + 
            scale_color_continuous(limits = c(0, 0.05), low = "red", high = "black"))
    dev.off()
    png(here(figDir, paste0(fileName, ".png")), height = height, width = width, res = 600, unit = "in")
    print(dotplot(GO, showCategory = categoryNum, title = name) + 
            scale_color_continuous(limits = c(0, 0.05), low = "red", high = "black"))
    dev.off()
  } 
}

convPvalue <- function(pvalue){
  out <- ifelse(pvalue < 0.0001, "****",
                ifelse(pvalue < 0.001, "***",
                       ifelse(pvalue < 0.01, "**",
                              ifelse(pvalue < 0.05, "*", "ns"))))
  return(out)
}

loadLoopAnnoData <- function(fileName, diffCutoff = 0.2, annoList = c("P-P", "P-E", "P-S", "P-X")){
  data <- fread(fileName) %>%
    dplyr::mutate(updown_dTAG_DMSO = ifelse(diff_dTAG_DMSO > diffCutoff, "UP", 
                                            ifelse(diff_dTAG_DMSO > -diffCutoff, "NO", "DOWN")),
                  updown_A485_DMSO = ifelse(diff_A485_DMSO > diffCutoff, "UP", 
                                            ifelse(diff_A485_DMSO > -diffCutoff, "NO", "DOWN")),
                  gene = strsplit(gene, '\\|')) %>%
    dplyr::filter(Anno2 %in%  annoList)
  data$updown_dTAG_DMSO <- factor(data$updown_dTAG_DMSO, levels = c("UP", "NO", "DOWN"))
  data$updown_A485_DMSO <- factor(data$updown_A485_DMSO, levels = c("UP", "NO", "DOWN"))
  
  return(data)
}
```

#### dTAG Scatterplot
```{r}
loopType <- "p-n"

# getting list of genes of interest from RNA-seq and PRO-seq
alpha <- 0.05
fcCutoff <- 0.5
diff.PRO.G1.dTAG <- fread(here(refDir, "diff_G1G2.dTAG_G1.dTAG_vs_G1.DMSO_PROseq.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.PRO.G2.dTAG <- fread(here(refDir, "diff_G1G2.dTAG_G2.dTAG_vs_G2.DMSO_PROseq.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG.noFCcutoff <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::filter(padj < alpha)
# Importing loop gene annotation
name <- "chromo_cons_annoHierarchy"

diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = diffCutoff,
                                      annoList = c("P-P", "P-E", "P-S", "P-X"))%>%
  dplyr::mutate(distance = start2 - start1)
# geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
#                                       diffCutoff = diffCutoff,
#                                       annoList = c("P-P"))%>% 
#   dplyr::mutate(distance = start2 - start1)
# Testing different ways to calculate representative feature of loops for each gene
diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
geneList.down <- unique(c((diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id,
                          (diff.PRO.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id,
                          (diff.PRO.G2.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id))
geneList.down.RNA <- (diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id
geneList.down.RNA.noFCcutoff <- (diff.RNA.G1.dTAG.noFCcutoff %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id
geneList.updown.RNA.noFCcutoff <- (diff.RNA.G1.dTAG.noFCcutoff)$ensembl_gene_id

geneList.up <- unique(c((diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id,
                        (diff.PRO.G1.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id,
                        (diff.PRO.G2.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id))


# [1] Average of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 2

temp <- geneAnnoData %>% dplyr::select(diff_dTAG_DMSO, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = mean(diff_dTAG_DMSO), .groups = 'drop')

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
                              ifelse(gene %in% c(), "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)

#fwrite(temp, here(consensusDir, "gene_avgScore_fc_dTAG_p-n.tsv"), sep = "\t")

# 
# p <- ggplot(temp, aes(x = shrlog2fcMax, y = mean_diff_score, color = flag, 
#                       label = ifelse(flag != "0NO", external_gene_name, NA),
#                       shape = as.factor(maxFlag))) +
#   geom_point() + geom_text_repel() + theme_classic() + 
#   ggtitle("dTAG") +
#   geom_hline(yintercept = 0) +
#   geom_hline(yintercept = diffCutoff, linetype = "dashed") +
#   geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
#   geom_vline(xintercept = 0) +
#   geom_vline(xintercept = fcCutoff, linetype = "dashed") +
#   geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
#   scale_color_manual(values = c("0NO" = "grey", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
#   scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))
# 
# fileName <- paste0("log2FC_vs_avgScore_dTAG_", loopType, "_diffCutoff_", diffCutoff)
# height <- 4
# width <- 7
# png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
# print(p)
# dev.off()
# svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
# print(p)
# dev.off()

# No FC cutoff option - down only
temp <- geneAnnoData %>% dplyr::select(diff_dTAG_DMSO, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = mean(diff_dTAG_DMSO), .groups = 'drop')

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA.noFCcutoff, "2DOWN",
                              ifelse(gene %in% c(), "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)

# fwrite(temp, here(consensusDir, paste0("gene_avgScore_fc_dTAG_", loopType, "_noFCcutoff.tsv")), sep = "\t")

p <- ggplot(temp, aes(x = shrinked_log2FC, y = mean_diff_score, color = flag, 
                      label = ifelse(flag != "0NO", external_gene_name, NA),
                      shape = as.factor(maxFlag))) +
  geom_point(size = 1, alpha = 1,
      stroke = 0) + geom_text_repel() + theme_classic() + 
  guides(color = "none", shape = "none") + 
  geom_hline(yintercept = 0, alpha = 1,
             color = "black",
             size = lineThick*mmToLineUnit,
             lineend = "square") +
  geom_hline(yintercept = c(diffCutoff, -diffCutoff), linetype = "dashed", alpha = 1,
             color = "black",
             size = lineThick*mmToLineUnit,
             lineend = "square") +
  geom_vline(xintercept = 0, alpha = 1,
             color = "black",
             size = lineThick*mmToLineUnit,
             lineend = "square") +
  geom_vline(xintercept = c(fcCutoff, -fcCutoff), linetype = "dashed", alpha = 1,
             color = "black",
             size = lineThick*mmToLineUnit,
             lineend = "square") +
  theme(
    axis.title.x = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.title.y = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),    
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  ) + labs(y = "Avg. Δ loop score", x = "log2(shrunken FC)") +
  scale_color_manual(values = c("0NO" = "#A9A8A9", "1UP" = "#CB333A", "2DOWN" = "#4852A0")) +  # Corrected color mapping
  scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))

fileName <- paste0("log2FC_vs_avgScore_dTAG_", loopType, "_diffCutoff_", diffCutoff, "_noFCcutoff")
height <- 42*mmToInch
width <- 50*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

# No FC cutoff option - up and down
temp <- geneAnnoData %>% dplyr::select(diff_dTAG_DMSO, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = mean(diff_dTAG_DMSO), .groups = 'drop')

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.updown.RNA.noFCcutoff, "2DOWN",
                              ifelse(gene %in% c(), "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)


p <- ggplot(temp, aes(x = shrinked_log2FC, y = mean_diff_score, color = flag, 
                      label = ifelse(flag != "0NO", external_gene_name, NA),
                      shape = as.factor(maxFlag))) +
  geom_point() + geom_text_repel() + theme_classic() + 
  ggtitle("dTAG") +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = diffCutoff, linetype = "dashed") +
  geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
  geom_vline(xintercept = 0) +
  geom_vline(xintercept = fcCutoff, linetype = "dashed") +
  geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
  scale_color_manual(values = c("0NO" = "grey", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
  scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))

fileName <- paste0("log2FC_vs_avgScore_dTAG_", loopType, "_diffCutoff_", diffCutoff, "_noFCcutoffupdown")
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

# 
# ## RNA + PRO-seq
# temp <- geneAnnoData %>% dplyr::select(diff_dTAG_DMSO, gene) %>% 
#   unnest(gene) %>% group_by(gene) %>%
#   summarize(mean_diff_score = mean(diff_dTAG_DMSO), .groups = 'drop')
# 
# temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
#   dplyr::mutate(flag = ifelse(gene %in% geneList.down, "2DOWN",
#                               ifelse(gene %in% geneList.up, "1UP", "0NO")),
#                 maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
#                 shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
#   dplyr::arrange(flag)
# 
# fwrite(temp, here(consensusDir, "gene_avgScore_fc_dTAG_", loopType, ".tsv"), sep = "\t")
# 
# p <- ggplot(temp, aes(x = shrlog2fcMax, y = mean_diff_score, color = flag, 
#                       label = ifelse(flag != "0NO", external_gene_name, NA),
#                       shape = as.factor(maxFlag))) +
#   geom_point() + geom_text_repel() + theme_classic() + 
#   ggtitle("dTAG") +
#   geom_hline(yintercept = 0) +
#   geom_hline(yintercept = diffCutoff, linetype = "dashed") +
#   geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
#   geom_vline(xintercept = 0) +
#   geom_vline(xintercept = fcCutoff, linetype = "dashed") +
#   geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
#   scale_color_manual(values = c("0NO" = "grey", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
#   scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))
# 
# fileName <- paste0("log2FC_vs_avgScore_dTAG_", loopType, "_diffCutoff_", diffCutoff, "_RNA-PRO")
# height <- 4
# width <- 7
# png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
# print(p)
# dev.off()
# svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
# print(p)
# dev.off()
```


#### dTAG Scatterplot OE
##### Avg
```{r}

loopType <- "p-pe"

# getting list of genes of interest from RNA-seq and PRO-seq
alpha <- 0.05
fcCutoff <- 0.5
diff.PRO.G1.dTAG <- fread(here(refDir, "diff_G1G2.dTAG_G1.dTAG_vs_G1.DMSO_PROseq.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.PRO.G2.dTAG <- fread(here(refDir, "diff_G1G2.dTAG_G2.dTAG_vs_G2.DMSO_PROseq.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG.noFCcutoff <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::filter(padj < alpha)
# Importing loop gene annotation
name <- "chromo_cons_annoHierarchy"

diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = diffCutoff,
                                      annoList = c("P-P", "P-E", "P-S", "P-X"))%>% 
  dplyr::mutate(distance = start2 - start1)


# Loading OE loop score
minValue <- -4
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
  dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
                log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)),
                log_obsexp_diff_dTAG_DMSO = log_obsexp_dTAG - log_obsexp_DMSO,
                log_obsexp_diff_A485_DMSO = log_obsexp_A485 - log_obsexp_DMSO)

geneAnnoData <- geneAnnoData %>% left_join(obsexp, by = c("id"))

# Testing different ways to calculate representative feature of loops for each gene
diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
geneList.down <- unique(c((diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id,
                          (diff.PRO.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id,
                          (diff.PRO.G2.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id))
geneList.down.RNA <- (diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id
geneList.down.RNA.noFCcutoff <- (diff.RNA.G1.dTAG.noFCcutoff %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id
geneList.updown.RNA.noFCcutoff <- (diff.RNA.G1.dTAG.noFCcutoff)$ensembl_gene_id

geneList.up <- unique(c((diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id,
                        (diff.PRO.G1.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id,
                        (diff.PRO.G2.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id))


# [1] Average of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 2

temp <- geneAnnoData %>% dplyr::select(log_obsexp_diff_dTAG_DMSO, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = mean(log_obsexp_diff_dTAG_DMSO), .groups = 'drop')

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
                              ifelse(gene %in% c(), "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)

#fwrite(temp, here(consensusDir, "gene_avgScore_fc_dTAG_p-n.tsv"), sep = "\t")

p <- ggplot(temp, aes(x = shrlog2fcMax, y = mean_diff_score, color = flag, 
                      label = ifelse(flag != "0NO", external_gene_name, NA),
                      shape = as.factor(maxFlag))) +
  geom_point() + geom_text_repel() + theme_classic() + 
  ggtitle("dTAG") +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = diffCutoff, linetype = "dashed") +
  geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
  geom_vline(xintercept = 0) +
  geom_vline(xintercept = fcCutoff, linetype = "dashed") +
  geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
  scale_color_manual(values = c("0NO" = "grey", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
  scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))

fileName <- paste0("log2FC_vs_avgScore_dTAG_OE_", loopType, "_avgLogOE")
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

# No FC cutoff option - down only
temp <- geneAnnoData %>% dplyr::select(log_obsexp_diff_dTAG_DMSO, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = mean(log_obsexp_diff_dTAG_DMSO), .groups = 'drop')

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA.noFCcutoff, "2DOWN",
                              ifelse(gene %in% c(), "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)


p <- ggplot(temp, aes(x = shrinked_log2FC, y = mean_diff_score, color = flag, 
                      label = ifelse(flag != "0NO", external_gene_name, NA),
                      shape = as.factor(maxFlag))) +
  geom_point() + geom_text_repel() + theme_classic() + 
  ggtitle("dTAG") +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = diffCutoff, linetype = "dashed") +
  geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
  geom_vline(xintercept = 0) +
  geom_vline(xintercept = fcCutoff, linetype = "dashed") +
  geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
  scale_color_manual(values = c("0NO" = "grey", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
  scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))

fileName <- paste0("log2FC_vs_avgScore_dTAG_OE_", loopType, "_avgLogOE_noFCcutoff")
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```
##### Sum
```{r}
# getting list of genes of interest from RNA-seq and PRO-seq
alpha <- 0.05
fcCutoff <- 0.5
diff.PRO.G1.dTAG <- fread(here(refDir, "diff_G1G2.dTAG_G1.dTAG_vs_G1.DMSO_PROseq.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.PRO.G2.dTAG <- fread(here(refDir, "diff_G1G2.dTAG_G2.dTAG_vs_G2.DMSO_PROseq.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG.noFCcutoff <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::filter(padj < alpha)
# Importing loop gene annotation
name <- "chromo_cons_annoHierarchy"

diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = diffCutoff,
                                      annoList = c("P-P", "P-E", "P-S", "P-X"))%>% 
  dplyr::mutate(distance = start2 - start1)


# Loading OE loop score
minValue <- -4
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
  dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
                log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)),
                log_obsexp_diff_dTAG_DMSO = log_obsexp_dTAG - log_obsexp_DMSO,
                log_obsexp_diff_A485_DMSO = log_obsexp_A485 - log_obsexp_DMSO)

geneAnnoData <- geneAnnoData %>% left_join(obsexp, by = c("id"))

# Testing different ways to calculate representative feature of loops for each gene
diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
geneList.down <- unique(c((diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id,
                          (diff.PRO.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id,
                          (diff.PRO.G2.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id))
geneList.down.RNA <- (diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id
geneList.down.RNA.noFCcutoff <- (diff.RNA.G1.dTAG.noFCcutoff %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id
geneList.updown.RNA.noFCcutoff <- (diff.RNA.G1.dTAG.noFCcutoff)$ensembl_gene_id

geneList.up <- unique(c((diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id,
                        (diff.PRO.G1.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id,
                        (diff.PRO.G2.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id))


# [1] Average of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 2

temp <- geneAnnoData %>% dplyr::select(log_obsexp_diff_dTAG_DMSO, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = sum(log_obsexp_diff_dTAG_DMSO), .groups = 'drop')

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
                              ifelse(gene %in% c(), "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)

#fwrite(temp, here(consensusDir, "gene_avgScore_fc_dTAG_p-n.tsv"), sep = "\t")

p <- ggplot(temp, aes(x = shrlog2fcMax, y = mean_diff_score, color = flag, 
                      label = ifelse(flag != "0NO", external_gene_name, NA),
                      shape = as.factor(maxFlag))) +
  geom_point() + geom_text_repel() + theme_classic() + 
  ggtitle("dTAG") +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = diffCutoff, linetype = "dashed") +
  geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
  geom_vline(xintercept = 0) +
  geom_vline(xintercept = fcCutoff, linetype = "dashed") +
  geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
  scale_color_manual(values = c("0NO" = "grey", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
  scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))

fileName <- paste0("log2FC_vs_avgScore_dTAG_OE_p-n_sumLogOE")
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

# No FC cutoff option - down only
temp <- geneAnnoData %>% dplyr::select(log_obsexp_diff_dTAG_DMSO, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = sum(log_obsexp_diff_dTAG_DMSO), .groups = 'drop')

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA.noFCcutoff, "2DOWN",
                              ifelse(gene %in% c(), "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)


p <- ggplot(temp, aes(x = shrinked_log2FC, y = mean_diff_score, color = flag, 
                      label = ifelse(flag != "0NO", external_gene_name, NA),
                      shape = as.factor(maxFlag))) +
  geom_point() + geom_text_repel() + theme_classic() + 
  ggtitle("dTAG") +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = diffCutoff, linetype = "dashed") +
  geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
  geom_vline(xintercept = 0) +
  geom_vline(xintercept = fcCutoff, linetype = "dashed") +
  geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
  scale_color_manual(values = c("0NO" = "grey", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
  scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))

fileName <- paste0("log2FC_vs_avgScore_dTAG_OE_p-n_sumLogOE_noFCcutoff")
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```

##### abs max
```{r}
# getting list of genes of interest from RNA-seq and PRO-seq
alpha <- 0.05
fcCutoff <- 0.5
diff.PRO.G1.dTAG <- fread(here(refDir, "diff_G1G2.dTAG_G1.dTAG_vs_G1.DMSO_PROseq.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.PRO.G2.dTAG <- fread(here(refDir, "diff_G1G2.dTAG_G2.dTAG_vs_G2.DMSO_PROseq.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG.noFCcutoff <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::filter(padj < alpha)
# Importing loop gene annotation
name <- "chromo_cons_annoHierarchy"

diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = diffCutoff,
                                      annoList = c("P-P", "P-E", "P-S", "P-X"))%>% 
  dplyr::mutate(distance = start2 - start1)


# Loading OE loop score
minValue <- -4
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
  dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
                log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)),
                log_obsexp_diff_dTAG_DMSO = log_obsexp_dTAG - log_obsexp_DMSO,
                log_obsexp_diff_A485_DMSO = log_obsexp_A485 - log_obsexp_DMSO)

geneAnnoData <- geneAnnoData %>% left_join(obsexp, by = c("id"))

# Testing different ways to calculate representative feature of loops for each gene
diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
geneList.down <- unique(c((diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id,
                          (diff.PRO.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id,
                          (diff.PRO.G2.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id))
geneList.down.RNA <- (diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id
geneList.down.RNA.noFCcutoff <- (diff.RNA.G1.dTAG.noFCcutoff %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id
geneList.updown.RNA.noFCcutoff <- (diff.RNA.G1.dTAG.noFCcutoff)$ensembl_gene_id

geneList.up <- unique(c((diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id,
                        (diff.PRO.G1.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id,
                        (diff.PRO.G2.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id))


# [1] Average of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 2

temp <- geneAnnoData %>% dplyr::select(log_obsexp_diff_dTAG_DMSO, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = log_obsexp_diff_dTAG_DMSO[which.max(abs(log_obsexp_diff_dTAG_DMSO))], .groups = 'drop')


temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
                              ifelse(gene %in% c(), "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)

#fwrite(temp, here(consensusDir, "gene_avgScore_fc_dTAG_p-n.tsv"), sep = "\t")

p <- ggplot(temp, aes(x = shrlog2fcMax, y = mean_diff_score, color = flag, 
                      label = ifelse(flag != "0NO", external_gene_name, NA),
                      shape = as.factor(maxFlag))) +
  geom_point() + geom_text_repel() + theme_classic() + 
  ggtitle("dTAG") +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = diffCutoff, linetype = "dashed") +
  geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
  geom_vline(xintercept = 0) +
  geom_vline(xintercept = fcCutoff, linetype = "dashed") +
  geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
  scale_color_manual(values = c("0NO" = "grey", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
  scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))

fileName <- paste0("log2FC_vs_avgScore_dTAG_OE_p-n_absLogOE")
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

# No FC cutoff option - down only
temp <- geneAnnoData %>% dplyr::select(log_obsexp_diff_dTAG_DMSO, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = log_obsexp_diff_dTAG_DMSO[which.max(abs(log_obsexp_diff_dTAG_DMSO))], .groups = 'drop')

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA.noFCcutoff, "2DOWN",
                              ifelse(gene %in% c(), "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)


p <- ggplot(temp, aes(x = shrinked_log2FC, y = mean_diff_score, color = flag, 
                      label = ifelse(flag != "0NO", external_gene_name, NA),
                      shape = as.factor(maxFlag))) +
  geom_point() + geom_text_repel() + theme_classic() + 
  ggtitle("dTAG") +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = diffCutoff, linetype = "dashed") +
  geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
  geom_vline(xintercept = 0) +
  geom_vline(xintercept = fcCutoff, linetype = "dashed") +
  geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
  scale_color_manual(values = c("0NO" = "grey", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
  scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))

fileName <- paste0("log2FC_vs_avgScore_dTAG_OE_p-n_absLogOE_noFCcutoff")
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```
##### Closest P-N loop
```{r}
# getting list of genes of interest from RNA-seq and PRO-seq
alpha <- 0.05
fcCutoff <- 0.5
diff.PRO.G1.dTAG <- fread(here(refDir, "diff_G1G2.dTAG_G1.dTAG_vs_G1.DMSO_PROseq.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.PRO.G2.dTAG <- fread(here(refDir, "diff_G1G2.dTAG_G2.dTAG_vs_G2.DMSO_PROseq.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG.noFCcutoff <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::filter(padj < alpha)
# Importing loop gene annotation
name <- "chromo_cons_annoHierarchy"

diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = diffCutoff,
                                      annoList = c("P-P", "P-E", "P-S", "P-X"))%>% 
  dplyr::mutate(distance = start2 - start1)


# Loading OE loop score
minValue <- -4
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
  dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
                log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)),
                log_obsexp_diff_dTAG_DMSO = log_obsexp_dTAG - log_obsexp_DMSO,
                log_obsexp_diff_A485_DMSO = log_obsexp_A485 - log_obsexp_DMSO)

geneAnnoData <- geneAnnoData %>% left_join(obsexp, by = c("id"))

# Testing different ways to calculate representative feature of loops for each gene
diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
geneList.down <- unique(c((diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id,
                          (diff.PRO.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id,
                          (diff.PRO.G2.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id))
geneList.down.RNA <- (diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id
geneList.down.RNA.noFCcutoff <- (diff.RNA.G1.dTAG.noFCcutoff %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id
geneList.updown.RNA.noFCcutoff <- (diff.RNA.G1.dTAG.noFCcutoff)$ensembl_gene_id

geneList.up <- unique(c((diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id,
                        (diff.PRO.G1.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id,
                        (diff.PRO.G2.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id))


# [1] Average of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 2

temp <- geneAnnoData %>% dplyr::select(log_obsexp_diff_dTAG_DMSO, gene, distance) %>% 
  unnest(gene) %>% group_by(gene) %>% 
  slice_min(distance, with_ties = FALSE) %>% 
  summarize(mean_diff_score = log_obsexp_diff_dTAG_DMSO, .groups = 'drop')

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
                              ifelse(gene %in% c(), "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)

#fwrite(temp, here(consensusDir, "gene_avgScore_fc_dTAG_p-n.tsv"), sep = "\t")

p <- ggplot(temp, aes(x = shrlog2fcMax, y = mean_diff_score, color = flag, 
                      label = ifelse(flag != "0NO", external_gene_name, NA),
                      shape = as.factor(maxFlag))) +
  geom_point() + geom_text_repel() + theme_classic() + 
  ggtitle("dTAG") +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = diffCutoff, linetype = "dashed") +
  geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
  geom_vline(xintercept = 0) +
  geom_vline(xintercept = fcCutoff, linetype = "dashed") +
  geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
  scale_color_manual(values = c("0NO" = "grey", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
  scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))

fileName <- paste0("log2FC_vs_avgScore_dTAG_OE_p-n_closestLogOE")
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

# No FC cutoff option - down only
temp <- geneAnnoData %>% dplyr::select(log_obsexp_diff_dTAG_DMSO, gene, distance) %>% 
  unnest(gene) %>% group_by(gene) %>%
  slice_min(distance, with_ties = FALSE) %>% 
  summarize(mean_diff_score = log_obsexp_diff_dTAG_DMSO, .groups = 'drop')

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA.noFCcutoff, "2DOWN",
                              ifelse(gene %in% c(), "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)


p <- ggplot(temp, aes(x = shrinked_log2FC, y = mean_diff_score, color = flag, 
                      label = ifelse(flag != "0NO", external_gene_name, NA),
                      shape = as.factor(maxFlag))) +
  geom_point() + geom_text_repel() + theme_classic() + 
  ggtitle("dTAG") +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = diffCutoff, linetype = "dashed") +
  geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
  geom_vline(xintercept = 0) +
  geom_vline(xintercept = fcCutoff, linetype = "dashed") +
  geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
  scale_color_manual(values = c("0NO" = "grey", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
  scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))

fileName <- paste0("log2FC_vs_avgScore_dTAG_OE_p-n_closestLogOE_noFCcutoff")
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```

##### Farthest P-N loop
```{r}
# getting list of genes of interest from RNA-seq and PRO-seq
alpha <- 0.05
fcCutoff <- 0.5
diff.PRO.G1.dTAG <- fread(here(refDir, "diff_G1G2.dTAG_G1.dTAG_vs_G1.DMSO_PROseq.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.PRO.G2.dTAG <- fread(here(refDir, "diff_G1G2.dTAG_G2.dTAG_vs_G2.DMSO_PROseq.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG.noFCcutoff <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::filter(padj < alpha)
# Importing loop gene annotation
name <- "chromo_cons_annoHierarchy"

diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = diffCutoff,
                                      annoList = c("P-P", "P-E", "P-S", "P-X"))%>% 
  dplyr::mutate(distance = start2 - start1)


# Loading OE loop score
minValue <- -4
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
  dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
                log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)),
                log_obsexp_diff_dTAG_DMSO = log_obsexp_dTAG - log_obsexp_DMSO,
                log_obsexp_diff_A485_DMSO = log_obsexp_A485 - log_obsexp_DMSO)

geneAnnoData <- geneAnnoData %>% left_join(obsexp, by = c("id"))

# Testing different ways to calculate representative feature of loops for each gene
diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
geneList.down <- unique(c((diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id,
                          (diff.PRO.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id,
                          (diff.PRO.G2.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id))
geneList.down.RNA <- (diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id
geneList.down.RNA.noFCcutoff <- (diff.RNA.G1.dTAG.noFCcutoff %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id
geneList.updown.RNA.noFCcutoff <- (diff.RNA.G1.dTAG.noFCcutoff)$ensembl_gene_id

geneList.up <- unique(c((diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id,
                        (diff.PRO.G1.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id,
                        (diff.PRO.G2.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id))


# [1] Average of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 2

temp <- geneAnnoData %>% dplyr::select(log_obsexp_diff_dTAG_DMSO, gene, distance) %>% 
  unnest(gene) %>% group_by(gene) %>% 
  slice_max(distance, with_ties = FALSE) %>% 
  summarize(mean_diff_score = log_obsexp_diff_dTAG_DMSO, .groups = 'drop')

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
                              ifelse(gene %in% c(), "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)

#fwrite(temp, here(consensusDir, "gene_avgScore_fc_dTAG_p-n.tsv"), sep = "\t")

p <- ggplot(temp, aes(x = shrlog2fcMax, y = mean_diff_score, color = flag, 
                      label = ifelse(flag != "0NO", external_gene_name, NA),
                      shape = as.factor(maxFlag))) +
  geom_point() + geom_text_repel() + theme_classic() + 
  ggtitle("dTAG") +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = diffCutoff, linetype = "dashed") +
  geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
  geom_vline(xintercept = 0) +
  geom_vline(xintercept = fcCutoff, linetype = "dashed") +
  geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
  scale_color_manual(values = c("0NO" = "grey", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
  scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))

fileName <- paste0("log2FC_vs_avgScore_dTAG_OE_p-n_farthestLogOE")
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

# No FC cutoff option - down only
temp <- geneAnnoData %>% dplyr::select(log_obsexp_diff_dTAG_DMSO, gene, distance) %>% 
  unnest(gene) %>% group_by(gene) %>%
  slice_max(distance, with_ties = FALSE) %>% 
  summarize(mean_diff_score = log_obsexp_diff_dTAG_DMSO, .groups = 'drop')

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA.noFCcutoff, "2DOWN",
                              ifelse(gene %in% c(), "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)


p <- ggplot(temp, aes(x = shrinked_log2FC, y = mean_diff_score, color = flag, 
                      label = ifelse(flag != "0NO", external_gene_name, NA),
                      shape = as.factor(maxFlag))) +
  geom_point() + geom_text_repel() + theme_classic() + 
  ggtitle("dTAG") +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = diffCutoff, linetype = "dashed") +
  geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
  geom_vline(xintercept = 0) +
  geom_vline(xintercept = fcCutoff, linetype = "dashed") +
  geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
  scale_color_manual(values = c("0NO" = "grey", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
  scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))

fileName <- paste0("log2FC_vs_avgScore_dTAG_OE_p-n_farthestLogOE_noFCcutoff")
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```

#### A485 Scatterplot
##### P-N
```{r}
# getting list of genes of interest from RNA-seq and PRO-seq
alpha <- 0.05
fcCutoff <- 0.5
diff.RNA.G1.A485 <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)

# Importing loop gene annotation
name <- "chromo_cons_annoHierarchy"

diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = diffCutoff,
                                      annoList = c("P-P", "P-E", "P-S", "P-X"))%>% 
  dplyr::mutate(distance = start2 - start1)

# Testing different ways to calculate representative feature of loops for each gene
diff.RNA <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
geneList.down.RNA <- (diff.RNA.G1.A485 %>% dplyr::filter(shrinked_log2FC < -fcCutoff))$ensembl_gene_id
geneList.up.RNA <- (diff.RNA.G1.A485 %>% dplyr::filter(shrinked_log2FC > fcCutoff))$ensembl_gene_id


# [1] Average of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 4

temp <- geneAnnoData %>% dplyr::select(diff_A485_DMSO, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = mean(diff_A485_DMSO), .groups = 'drop')

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
                              ifelse(gene %in% geneList.up.RNA, "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)

fwrite(temp, here(consensusDir, "gene_avgScore_fc_dTAG_p-n.tsv"), sep = "\t")

p <- ggplot(temp, aes(x = shrlog2fcMax, y = mean_diff_score, color = flag, 
                      label = ifelse(flag != "0NO", external_gene_name, NA),
                      shape = as.factor(maxFlag))) +
  geom_point() + geom_text_repel() + theme_classic() + 
  ggtitle("A485") +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = diffCutoff, linetype = "dashed") +
  geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
  geom_vline(xintercept = 0) +
  geom_vline(xintercept = fcCutoff, linetype = "dashed") +
  geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
  scale_color_manual(values = c("0NO" = "black", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
  scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))

fileName <- paste0("log2FC_vs_avgScore_A485_p-n_diffCutoff_", diffCutoff)
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

# [2] Sum of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 4

temp <- geneAnnoData %>% dplyr::select(diff_A485_DMSO, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(sum_diff_score = sum(diff_A485_DMSO), .groups = 'drop')

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
                              ifelse(gene %in% geneList.up.RNA, "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)

fwrite(temp, here(consensusDir, "gene_sumScore_fc_dTAG_p-n.tsv"), sep = "\t")

p <- ggplot(temp, aes(x = shrlog2fcMax, y = sum_diff_score, color = flag, 
                      label = ifelse(flag != "0NO", external_gene_name, NA),
                      shape = as.factor(maxFlag))) +
  geom_point() + geom_text_repel() + theme_classic() + 
  ggtitle("A485") +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = diffCutoff, linetype = "dashed") +
  geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
  geom_vline(xintercept = 0) +
  geom_vline(xintercept = fcCutoff, linetype = "dashed") +
  geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
  scale_color_manual(values = c("0NO" = "black", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
  scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))

fileName <- paste0("log2FC_vs_sumScore_A485_p-n_diffCutoff_", diffCutoff)
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()


# [3] Max of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 4

temp <- geneAnnoData %>% dplyr::select(diff_A485_DMSO, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(max_abs_diff_score = diff_A485_DMSO[which.max(abs(diff_A485_DMSO))], .groups = 'drop')

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
                              ifelse(gene %in% geneList.up.RNA, "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)

fwrite(temp, here(consensusDir, "gene_maxAbsScore_fc_dTAG_p-n.tsv"), sep = "\t")

p <- ggplot(temp, aes(x = shrlog2fcMax, y = max_abs_diff_score, color = flag, 
                      label = ifelse(flag != "0NO", external_gene_name, NA),
                      shape = as.factor(maxFlag))) +
  geom_point() + geom_text_repel() + theme_classic() + 
  ggtitle("A485") +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = diffCutoff, linetype = "dashed") +
  geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
  geom_vline(xintercept = 0) +
  geom_vline(xintercept = fcCutoff, linetype = "dashed") +
  geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
  scale_color_manual(values = c("0NO" = "black", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
  scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))

fileName <- paste0("log2FC_vs_maxAbsScore_A485_p-n_diffCutoff_", diffCutoff)
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

```
##### P-PE
```{r}
# getting list of genes of interest from RNA-seq and PRO-seq
alpha <- 0.05
fcCutoff <- 0.5
diff.RNA.G1.A485 <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)

# Importing loop gene annotation
name <- "chromo_cons_annoHierarchy"

diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = diffCutoff,
                                      annoList = c("P-P", "P-E"))%>% 
  dplyr::mutate(distance = start2 - start1)

# Testing different ways to calculate representative feature of loops for each gene
diff.RNA <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
geneList.down.RNA <- (diff.RNA.G1.A485 %>% dplyr::filter(shrinked_log2FC < -fcCutoff))$ensembl_gene_id
geneList.up.RNA <- (diff.RNA.G1.A485 %>% dplyr::filter(shrinked_log2FC > fcCutoff))$ensembl_gene_id


# [1] Average of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 4

temp <- geneAnnoData %>% dplyr::select(diff_A485_DMSO, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = mean(diff_A485_DMSO), .groups = 'drop')

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
                              ifelse(gene %in% geneList.up.RNA, "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)

fwrite(temp, here(consensusDir, "gene_avgScore_fc_dTAG_p-pe.tsv"), sep = "\t")

p <- ggplot(temp, aes(x = shrlog2fcMax, y = mean_diff_score, color = flag, 
                      label = ifelse(flag != "0NO", external_gene_name, NA),
                      shape = as.factor(maxFlag))) +
  geom_point() + geom_text_repel() + theme_classic() + 
  ggtitle("A485") +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = diffCutoff, linetype = "dashed") +
  geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
  geom_vline(xintercept = 0) +
  geom_vline(xintercept = fcCutoff, linetype = "dashed") +
  geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
  scale_color_manual(values = c("0NO" = "black", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
  scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))

fileName <- paste0("log2FC_vs_avgScore_A485_p-pe_diffCutoff_", diffCutoff)
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

# [2] Sum of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 4

temp <- geneAnnoData %>% dplyr::select(diff_A485_DMSO, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(sum_diff_score = sum(diff_A485_DMSO), .groups = 'drop')

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
                              ifelse(gene %in% geneList.up.RNA, "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)

fwrite(temp, here(consensusDir, "gene_sumScore_fc_dTAG_p-pe.tsv"), sep = "\t")

p <- ggplot(temp, aes(x = shrlog2fcMax, y = sum_diff_score, color = flag, 
                      label = ifelse(flag != "0NO", external_gene_name, NA),
                      shape = as.factor(maxFlag))) +
  geom_point() + geom_text_repel() + theme_classic() + 
  ggtitle("A485") +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = diffCutoff, linetype = "dashed") +
  geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
  geom_vline(xintercept = 0) +
  geom_vline(xintercept = fcCutoff, linetype = "dashed") +
  geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
  scale_color_manual(values = c("0NO" = "black", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
  scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))

fileName <- paste0("log2FC_vs_sumScore_A485_p-pe_diffCutoff_", diffCutoff)
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()


# [3] Max of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 4

temp <- geneAnnoData %>% dplyr::select(diff_A485_DMSO, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(max_abs_diff_score = diff_A485_DMSO[which.max(abs(diff_A485_DMSO))], .groups = 'drop')

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
                              ifelse(gene %in% geneList.up.RNA, "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)

fwrite(temp, here(consensusDir, "gene_maxAbsScore_fc_dTAG_p-pe.tsv"), sep = "\t")

p <- ggplot(temp, aes(x = shrlog2fcMax, y = max_abs_diff_score, color = flag, 
                      label = ifelse(flag != "0NO", external_gene_name, NA),
                      shape = as.factor(maxFlag))) +
  geom_point() + geom_text_repel() + theme_classic() + 
  ggtitle("A485") +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = diffCutoff, linetype = "dashed") +
  geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
  geom_vline(xintercept = 0) +
  geom_vline(xintercept = fcCutoff, linetype = "dashed") +
  geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
  scale_color_manual(values = c("0NO" = "black", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
  scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))

fileName <- paste0("log2FC_vs_maxAbsScore_A485_p-pe_diffCutoff_", diffCutoff)
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```

##### P-S
```{r}
# getting list of genes of interest from RNA-seq and PRO-seq
alpha <- 0.05
fcCutoff <- 0.5
diff.RNA.G1.A485 <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)

# Importing loop gene annotation
name <- "chromo_cons_annoHierarchy"

diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = diffCutoff,
                                      annoList = c("P-S"))%>% 
  dplyr::mutate(distance = start2 - start1)

# Testing different ways to calculate representative feature of loops for each gene
diff.RNA <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
geneList.down.RNA <- (diff.RNA.G1.A485 %>% dplyr::filter(shrinked_log2FC < -fcCutoff))$ensembl_gene_id
geneList.up.RNA <- (diff.RNA.G1.A485 %>% dplyr::filter(shrinked_log2FC > fcCutoff))$ensembl_gene_id


# [1] Average of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 4

temp <- geneAnnoData %>% dplyr::select(diff_A485_DMSO, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = mean(diff_A485_DMSO), .groups = 'drop')

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
                              ifelse(gene %in% geneList.up.RNA, "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)

fwrite(temp, here(consensusDir, "gene_avgScore_fc_dTAG_p-s.tsv"), sep = "\t")

p <- ggplot(temp, aes(x = shrlog2fcMax, y = mean_diff_score, color = flag, 
                      label = ifelse(flag != "0NO", external_gene_name, NA),
                      shape = as.factor(maxFlag))) +
  geom_point() + geom_text_repel() + theme_classic() + 
  ggtitle("A485") +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = diffCutoff, linetype = "dashed") +
  geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
  geom_vline(xintercept = 0) +
  geom_vline(xintercept = fcCutoff, linetype = "dashed") +
  geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
  scale_color_manual(values = c("0NO" = "black", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
  scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))

fileName <- paste0("log2FC_vs_avgScore_A485_p-s_diffCutoff_", diffCutoff)
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

# [2] Sum of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 4

temp <- geneAnnoData %>% dplyr::select(diff_A485_DMSO, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(sum_diff_score = sum(diff_A485_DMSO), .groups = 'drop')

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
                              ifelse(gene %in% geneList.up.RNA, "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)

fwrite(temp, here(consensusDir, "gene_sumScore_fc_dTAG_p-s.tsv"), sep = "\t")

p <- ggplot(temp, aes(x = shrlog2fcMax, y = sum_diff_score, color = flag, 
                      label = ifelse(flag != "0NO", external_gene_name, NA),
                      shape = as.factor(maxFlag))) +
  geom_point() + geom_text_repel() + theme_classic() + 
  ggtitle("A485") +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = diffCutoff, linetype = "dashed") +
  geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
  geom_vline(xintercept = 0) +
  geom_vline(xintercept = fcCutoff, linetype = "dashed") +
  geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
  scale_color_manual(values = c("0NO" = "black", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
  scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))

fileName <- paste0("log2FC_vs_sumScore_A485_p-s_diffCutoff_", diffCutoff)
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()


# [3] Max of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 4

temp <- geneAnnoData %>% dplyr::select(diff_A485_DMSO, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(max_abs_diff_score = diff_A485_DMSO[which.max(abs(diff_A485_DMSO))], .groups = 'drop')

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
                              ifelse(gene %in% geneList.up.RNA, "1UP", "0NO")),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag)

fwrite(temp, here(consensusDir, "gene_maxAbsScore_fc_dTAG_p-s.tsv"), sep = "\t")

p <- ggplot(temp, aes(x = shrlog2fcMax, y = max_abs_diff_score, color = flag, 
                      label = ifelse(flag != "0NO", external_gene_name, NA),
                      shape = as.factor(maxFlag))) +
  geom_point() + geom_text_repel() + theme_classic() + 
  ggtitle("A485") +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = diffCutoff, linetype = "dashed") +
  geom_hline(yintercept = - diffCutoff, linetype = "dashed") + 
  geom_vline(xintercept = 0) +
  geom_vline(xintercept = fcCutoff, linetype = "dashed") +
  geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
  scale_color_manual(values = c("0NO" = "black", "1UP" = "red", "2DOWN" = "blue")) +  # Corrected color mapping
  scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))

fileName <- paste0("log2FC_vs_maxAbsScore_A485_p-s_diffCutoff_", diffCutoff)
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```


## [2.11] How many loops per gene?
```{r}
# IMPORTING GENE ANNO DATA FOR P-N LOOPS
name <- "chromo_cons_annoHierarchy"

geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = 0.2,
                                      annoList = c("P-P", "P-E", "P-S", "P-X"))%>% 
  dplyr::mutate(distance = start2 - start1,
                peakID = paste(chrom1, start1, start2, sep = "_"))


# Counting number of loop per genes
tempSum <- geneAnnoData %>% dplyr::select(peakID, gene, Anno2) %>% unnest(gene) %>% group_by(gene) %>%  summarize(
    peak = list(peakID),
    anno2 = list(Anno2),
    count = n())

ggplot(tempSum, aes(x = count)) + geom_histogram(binwidth = 1) + theme_classic() + 
  ggtitle("# of P-N loops for each gene") + scale_y_log10()

###################################################################################
```

## [2.11.2] SuperEnhancer?
```{r}
# IMPORTING GENE ANNO DATA FOR P-N LOOPS
name <- "chromo_cons_annoHierarchy"

geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = 0.2,
                                      annoList = c("P-P", "P-E", "P-S", "P-X"))

tempSE <- geneAnnoData %>% unnest(gene) %>% group_by(gene) %>% summarize(SE = ifelse(any(AnnoSE == "SE"), 1, 0))


group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group2.tsv"))$gene
group5 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group5.tsv"))$gene
group8 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group8.tsv"))$gene


tempSE <- tempSE %>% rowwise() %>% dplyr::mutate(
  group = ifelse(gene %in% group1, "group1",
                 ifelse(gene %in% group2, "group2",
                        ifelse(gene %in% group5, "group5",
                               ifelse(gene %in% group8, "group8", NA))))) %>% 
  dplyr::filter(!is.na(group))

perc.group1 <- nrow(tempSE %>% dplyr::filter(group == "group1", SE == 1))/
  nrow(tempSE %>% dplyr::filter(group == "group1"))

perc.group2 <- nrow(tempSE %>% dplyr::filter(group == "group2", SE == 1))/
  nrow(tempSE %>% dplyr::filter(group == "group2"))

perc.group5 <- nrow(tempSE %>% dplyr::filter(group == "group5", SE == 1))/
  nrow(tempSE %>% dplyr::filter(group == "group5"))

perc.group8 <- nrow(tempSE %>% dplyr::filter(group == "group8", SE == 1))/
  nrow(tempSE %>% dplyr::filter(group == "group8"))

tempPlot <- tibble(group = c("group1", "group2", "group5", "group8"),
                   perc = c(perc.group1, perc.group2, perc.group5, perc.group8))

ggplot(tempPlot, aes(x = group, y = perc)) + geom_point() + theme_classic() + ylim(0, 0.2)
```


```{r}
plot_insScore <- function(temp.tb, note, ymin = 0, ymax = 1.5){
  p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
  p15 <- round(getPvalWilcox(temp.tb, "group1", "group5"), 5)
  p18 <- round(getPvalWilcox(temp.tb, "group1", "group8"), 5)
  p25 <- round(getPvalWilcox(temp.tb, "group2", "group5"), 5)
  p28 <- round(getPvalWilcox(temp.tb, "group2", "group8"), 5)
  p58 <- round(getPvalWilcox(temp.tb, "group5", "group8"), 5)
  
  p <- ggplot(temp.tb, aes(x = group, y = score)) + geom_violin(aes(fill = group), show.legend = FALSE) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() + coord_cartesian(ylim = c(ymin, ymax)) +
    annotate("text", x = 1, y = ymin + 1, label = paste0("p12: ", convPvalue(p12), "\n",
                                                         "p15: ", convPvalue(p15), "\n",
                                                         "p18: ", convPvalue(p18), "\n",
                                                         "p25: ", convPvalue(p25), "\n",
                                                         "p28: ", convPvalue(p28), "\n",
                                                         "p58: ",convPvalue( p58), "\n"),
             color = "black", hjust = 0, size = 3)
  
  
  fileName <- paste0("insulation_score_", note)
  height <- 3
  width <- 3
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}

# DMSO
temp.tb <- gene.insScore %>% dplyr::select(group, boundaryInsScore_DMSO)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_DMSO")



```


## [2.12] How many loops/distance per gene in each quadrant?
#### dTAG with binary grouping
##### Splitting genes to group
```{r}
name <- "chromo_cons_annoHierarchy"

geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = 0.2,
                                      annoList = c("P-P", "P-E", "P-S", "P-X"))%>% 
  dplyr::mutate(distance = start2 - start1,
                peakID = paste(chrom1, start1, start2, sep = "_"))


## Dividing genes into groups
temp <- geneAnnoData %>% dplyr::select(diff_dTAG_DMSO, distance, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = mean(diff_dTAG_DMSO),
            mean_distance = mean(distance),
            .groups = 'drop')

diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)

maxLog2FC = 2

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  drop_na(shrinked_log2FC)


temp <- temp %>%
  dplyr::mutate(group = ifelse(padj < 0.05 & shrinked_log2FC < 0, 1, 
                               ifelse(mean_diff_score < 0, 2, 3)))



fcCutoff <- 0.5
diffCutoff <- 0.2

gene.group1 <- (temp %>% dplyr::filter(group == 1))$gene
gene.group2 <- (temp %>% dplyr::filter(group == 2))$gene
fwrite((temp %>% dplyr::filter(group == 1)), here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 2)), here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"), sep = "\t")


# Adding group information to geneAnno
geneAnnoData <- geneAnnoData %>% unnest(gene) %>% dplyr::mutate(
  group = ifelse(gene %in% gene.group1, "group1",
                 ifelse(gene %in% gene.group2, "group2", NA))
)


```
##### - GO for each group

```{r}

gene.group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
gene.group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene

GO1 <- enrichGO(gene = gene.group1, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE)
GO2 <- enrichGO(gene = gene.group2, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE)
GO1.df <- as.data.frame(GO1)
GO2.df <- as.data.frame(GO2)
# fwrite(GO1.df, here("GO_binary_group1.tsv"), sep = "\t")
# fwrite(GO2.df, here("GO_binary_group2.tsv"), sep = "\t")
GO1.df <- fread(here("GO_binary_group1.tsv"))
GO2.df <- fread(here("GO_binary_group2.tsv"))

subset1 <- GO1.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 1") %>%
  dplyr::arrange(p.adjust)
subset1$GeneRatio <- sapply(strsplit(subset1$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))

subset2 <- GO2.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 2") %>%
  dplyr::arrange(p.adjust)
subset2$GeneRatio <- sapply(strsplit(subset2$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))


GOlist <- factor(c("GO:0001824", "GO:0030879", "GO:0021953", "GO:0019827", 
                 "GO:0022613", "GO:0050767", "GO:0034470", "GO:0016055", 
                 "GO:0006397", "GO:0030900", "GO:0008380"))

data <- bind_rows(subset1, subset2) %>%
  dplyr::filter(ID %in% GOlist)

descOrder <- sort(unique(data$Description))[c(4, 10, 2, 1, 3, 11, 7, 8, 6, 5, 9)]
pValueLogMax <- 10
data <- data %>% dplyr::rowwise() %>% dplyr::mutate(pValueLog = min(-log10(p.adjust), pValueLogMax))
p <- ggplot(data, aes(x = group, y = factor(Description, levels = descOrder), size = pValueLog, fill = GeneRatio)) + 
  geom_point(shape = 21,        # Ensures a point with an outline
             stroke = 1*ptToMM) + 
  scale_size_continuous(range = c(0.5, 2)) +  # Set min and max point sizes here
  scale_fill_gradient(low = "white", high = "#CB333A",
                      # limits = c(0, 1),
                      oob = scales::squish, # Define gradient colors
                      guide = guide_colorbar(
                        barwidth = 1.5/5.08,  # Adjust width of the color bar
                        barheight = 15/5.08   # Adjust height of the color bar
                      )
  ) + 
  labs(x = NULL, y = NULL) +
  theme_bw() +  # Apply theme_bw first, so custom theme settings come after
  theme(
    panel.background = element_rect(fill = "transparent"),  # Override theme_bw panel
    axis.title = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.text.x = element_text(
      size = fontSizeS,         # Ensure size is set for x-axis text
      family = fontType,
      color = "#000000",
    ),
    axis.text.y = element_text(
      size = fontSizeS,         # Ensure size is set for y-axis text
      family = fontType,
      color = "#000000",
      lineheight = 0.9          # Allows wrapping for y-axis labels to fit into 2 lines
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick * mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick * mmToLineUnit,
      lineend = "square"
    ),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )
  


fileName <- here(figDir, "..", "GO", "GO_groups_binaryGrouping")
width <- panelSize(2.5)*mmToInch
height <- panelSize(1.2)*mmToInch
svglite(paste0(fileName, ".svg"), height = height, width = width)
print(p)
dev.off()
```


##### - Average loop size
```{r}
# Checking average distance of loops per gene
# temp is a tibble where delta loop and log2fc are merged
temp$group <- factor(temp$group)

temp <- temp %>% dplyr::filter(group %in% c(1, 2))


getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$mean_distance
  distance2 <- (data %>% dplyr::filter(group ==group2) )$mean_distance
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

pv12 <- round(getPvalWilcox(temp, 1, 2), 5)



p <- ggplot(temp, aes(x = group, y = mean_distance)) + geom_violin(aes(fill = group), show.legend = FALSE) +
  geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() + scale_y_continuous(labels = label_kb_mb) +
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")+
  annotate("text", x = 1, y = 1000000, label = paste0("pv12: ", pv12),
           color = "black", hjust = 0, size = 3)
  
  
fileName <- paste0("size_barplot_diffGroup_dTAG_vs_DMSO_binaryGroup")
height <- 3
width <- 2
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```

##### - Loop number per gene: P-N
```{r}
# Counting number of loop per genes
tempSum <- geneAnnoData %>% dplyr::select(peakID, gene, Anno2) %>% unnest(gene) %>% group_by(gene) %>%  summarize(
    peak = list(peakID),
    anno2 = list(Anno2),
    count = n())

tempSum <- tempSum %>% dplyr::mutate(
  group = ifelse(gene %in% gene.group1, "group1",
                 ifelse(gene %in% gene.group2, "group2", NA))
) %>% dplyr::filter(!is.na(group)) %>% dplyr::filter(group %in% c("group1", "group2"))

# 
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$count
  distance2 <- (data %>% dplyr::filter(group ==group2) )$count
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}


pv12 <- round(getPvalWilcox(tempSum,"group1", "group2"), 5)
# pv15 <- round(getPvalWilcox(tempSum,"group1", "group5"), 5)
# pv18 <- round(getPvalWilcox(tempSum,"group1", "group8"), 5)
# pv25 <- round(getPvalWilcox(tempSum,"group2", "group5"), 5)
# pv28 <- round(getPvalWilcox(tempSum,"group2", "group8"), 5)
# pv58 <- round(getPvalWilcox(tempSum,"group5", "group8"), 5)


p <- ggplot(tempSum, aes(x = group, y = count, fill = group)) + 
  geom_boxplot(width = 0.5, outlier.shape = NA, color = "black", alpha = 0.6,
                 linewidth = lineThick * mmToLineUnit, lineend = "square", show.legend = FALSE) + theme_classic() +       
  scale_y_continuous(breaks = seq(0, 10, by = 2)) + 
  labs(x = NULL, y = "# of P-N loops per gene") +
  coord_cartesian(ylim = c(0, 8)) +
  stat_summary(
    aes(group = group), fun = mean,
    geom = "point", shape = 21, size = 0.5,
    fill = "black", color = "black"
  )+
  annotate("text", x = 1, y = 3, label = paste0("pv12: ", convPvalue(pv12)),
           color = "black", hjust = 0, size = 1) +
  theme(
      axis.title = element_text(
        size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
    )


fileName <- paste0("count_barplot_diffGroup_dTAG_vs_DMSO_binaryGroup")
width <- panelSize(0.8)*mmToInch
height <- panelSize(1.5)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```

##### - Counting loop types
```{r}
#######
temp2 <- tempSum %>% rowwise() %>% mutate(total = length(anno2),
                                 num_pp = sum(anno2 == "P-P"),
                                 num_pe = sum(anno2 == "P-E"),
                                 num_ps = sum(anno2 == "P-S"),
                                 num_px = sum(anno2 == "P-X"),
                                 ratio_reg = (num_pp + num_pe)/total,
                                 ratio_str = num_ps/total)

#saveRDS(temp2, here(resultDir, "gene_loop_link_A485.rds"))


loopType <- temp2 %>% group_by(group) %>% summarise(num_pp = sum(num_pp),
                                        num_pe = sum(num_pe),
                                        num_ps = sum(num_ps),
                                        num_px = sum(num_px))


loopTypeLong <- loopType %>% pivot_longer(-group, names_to = "type", values_to = "count")

loopTypeLong$type <- factor(loopTypeLong$type, levels = c("num_pp", "num_pe", "num_ps", "num_px"))

# Plotting
ggplot(loopTypeLong, aes(fill=type, y=count, x=group)) + 
    geom_bar(position="fill", stat="identity") + theme_classic()

```

##### - Loop number per gene: P-P
```{r}
########
#P-P
data <- temp2 %>% dplyr::select(group, num_pp)
colnames(data) <- c("group", "count")
pv12 <- round(getPvalWilcox(data,"group1", "group2"), 5)
# pv15 <- round(getPvalWilcox(data,"group1", "group5"), 5)
# pv18 <- round(getPvalWilcox(data,"group1", "group8"), 5)
# pv25 <- round(getPvalWilcox(data,"group2", "group5"), 5)
# pv28 <- round(getPvalWilcox(data,"group2", "group8"), 5)
# pv58 <- round(getPvalWilcox(data,"group5", "group8"), 5)


p <- ggplot(data, aes(x = group, y = count, fill = group)) + 
  geom_boxplot(width = 0.5, outlier.shape = NA, color = "black", alpha = 0.6,
                 linewidth = lineThick * mmToLineUnit, lineend = "square", show.legend = FALSE) + theme_classic() +       
  scale_y_continuous(breaks = seq(0, 10, by = 2)) + 
  labs(x = NULL, y = "# of P-P loops per gene") +
  coord_cartesian(ylim = c(0, 4)) +
  stat_summary(
    aes(group = group), fun = mean,
    geom = "point", shape = 21, size = 0.5,
    fill = "black", color = "black"
  )+
  annotate("text", x = 1, y = 3, label = paste0("pv12: ", convPvalue(pv12)),
           color = "black", hjust = 0, size = 1) +
  theme(
      axis.title = element_text(
        size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
    )

fileName <- paste0("count_barplot_diffGroup_dTAG_vs_DMSO_pp_binaryGroup")
width <- panelSize(0.8)*mmToInch
height <- panelSize(1.5)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```

##### - Loop number per gene: P-E
```{r}
########
#P-E
data <- temp2 %>% dplyr::select(group, num_pe)
colnames(data) <- c("group", "count")
pv12 <- round(getPvalWilcox(data,"group1", "group2"), 5)


p <- ggplot(data, aes(x = group, y = count, fill = group)) + 
  geom_boxplot(width = 0.5, outlier.shape = NA, color = "black", alpha = 0.6,
                 linewidth = lineThick * mmToLineUnit, lineend = "square", show.legend = FALSE) + theme_classic() +       
  scale_y_continuous(breaks = seq(0, 10, by = 2)) + 
  labs(x = NULL, y = "# of P-E loops per gene") +
  coord_cartesian(ylim = c(0, 4)) +
  stat_summary(
    aes(group = group), fun = mean,
    geom = "point", shape = 21, size = 0.5,
    fill = "black", color = "black"
  )+
  annotate("text", x = 1, y = 3, label = paste0("pv12: ", convPvalue(pv12)),
           color = "black", hjust = 0, size = 1) +
  theme(
      axis.title = element_text(
        size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
    )



fileName <- paste0("count_barplot_diffGroup_dTAG_vs_DMSO_pe_binaryGroup")
width <- panelSize(0.8)*mmToInch
height <- panelSize(1.5)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```

##### - Loop number per gene: P-S
```{r}
########
#P-S
data <- temp2 %>% dplyr::select(group, num_ps)
colnames(data) <- c("group", "count")
pv12 <- round(getPvalWilcox(data,"group1", "group2"), 5)


p <- ggplot(data, aes(x = group, y = count, fill = group)) + 
  geom_boxplot(width = 0.5, outlier.shape = NA, color = "black", alpha = 0.6,
                 linewidth = lineThick * mmToLineUnit, lineend = "square", show.legend = FALSE) + theme_classic() +       
  scale_y_continuous(breaks = seq(0, 10, by = 2)) + 
  labs(x = NULL, y = "# of P-S loops per gene") +
  coord_cartesian(ylim = c(0, 4)) +
  stat_summary(
    aes(group = group), fun = mean,
    geom = "point", shape = 21, size = 0.5,
    fill = "black", color = "black"
  )+
  annotate("text", x = 1, y = 3, label = paste0("pv12: ", convPvalue(pv12)),
           color = "black", hjust = 0, size = 1) +
  theme(
      axis.title = element_text(
        size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
    )



fileName <- paste0("count_barplot_diffGroup_dTAG_vs_DMSO_ps_binaryGroup")
width <- panelSize(0.8)*mmToInch
height <- panelSize(1.5)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```

#### dTAG
##### - Splitting genes to groups
```{r}
name <- "chromo_cons_annoHierarchy"

geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = 0.2,
                                      annoList = c("P-P", "P-E", "P-S", "P-X"))%>% 
  dplyr::mutate(distance = start2 - start1,
                peakID = paste(chrom1, start1, start2, sep = "_"))



## Dividing genes into groups
temp <- geneAnnoData %>% dplyr::select(diff_dTAG_DMSO, distance, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = mean(diff_dTAG_DMSO),
            mean_distance = mean(distance),
            .groups = 'drop')

diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)

maxLog2FC = 2

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  drop_na(shrinked_log2FC)


fcCutoff <- 0.5
diffCutoff <- 0.2
# 
temp <- temp %>%
  dplyr::mutate(group = ifelse(mean_diff_score < -diffCutoff,
                               ifelse(shrinked_log2FC < -fcCutoff, 1,
                                      ifelse(shrinked_log2FC <fcCutoff, 2, 3)),
                               ifelse(mean_diff_score < diffCutoff,
                                      ifelse(shrinked_log2FC < -fcCutoff, 4,
                                             ifelse(shrinked_log2FC < fcCutoff, 5, 6)),
                                      ifelse(shrinked_log2FC < -fcCutoff, 7,
                                             ifelse(shrinked_log2FC < fcCutoff, 8, 9)))))


gene.group1 <- (temp %>% dplyr::filter(group == 1))$gene
gene.group2 <- (temp %>% dplyr::filter(group == 2))$gene
gene.group3 <- (temp %>% dplyr::filter(group == 3))$gene
gene.group4 <- (temp %>% dplyr::filter(group == 4))$gene
gene.group5 <- (temp %>% dplyr::filter(group == 5))$gene
gene.group6 <- (temp %>% dplyr::filter(group == 6))$gene
gene.group7 <- (temp %>% dplyr::filter(group == 7))$gene
gene.group8 <- (temp %>% dplyr::filter(group == 8))$gene
gene.group9 <- (temp %>% dplyr::filter(group == 9))$gene

fwrite((temp %>% dplyr::filter(group == 1)), here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group1.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 2)), here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group2.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 3)), here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group3.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 4)), here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group4.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 5)), here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group5.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 6)), here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group6.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 7)), here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group7.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 8)), here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group8.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 9)), here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group9.tsv"), sep = "\t")


# Adding group information to geneAnno
geneAnnoData <- geneAnnoData %>% unnest(gene) %>% dplyr::mutate(
  group = ifelse(gene %in% gene.group1, "group1",
                 ifelse(gene %in% gene.group2, "group2",
                        ifelse(gene %in% gene.group3, "group3",
                               ifelse(gene %in% gene.group4, "group4",
                                      ifelse(gene %in% gene.group5, "group5",
                                             ifelse(gene %in% gene.group6, "group6",
                                                    ifelse(gene %in% gene.group7, "group7",
                                                           ifelse(gene %in% gene.group8, "group8",
                                                                  ifelse(gene %in% gene.group9, "group9", NA)))))))))
)


```
##### - GO for each group

```{r}
GO1 <- enrichGO(gene = gene.group1, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO2 <- enrichGO(gene = gene.group2, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO5 <- enrichGO(gene = gene.group5, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO8 <- enrichGO(gene = gene.group8, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")

GO1.df <- as.data.frame(GO1)
GO2.df <- as.data.frame(GO2)
GO3.df <- as.data.frame(GO5)
GO4.df <- as.data.frame(GO8)

subset1 <- GO1.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "group1") %>%
  dplyr::mutate(
    gr = sapply(GeneRatio, function(x) {
      # Split the string by "/"
      parts <- unlist(strsplit(x, "/"))
      # Convert to numeric and perform the division
      as.numeric(parts[1]) / as.numeric(parts[2])
    })
  ) %>% dplyr::arrange(desc(gr))
subset2 <- GO2.df %>% dplyr::select(ID, Description,GeneRatio, p.adjust) %>% dplyr::mutate(group = "group2") %>%
  dplyr::mutate(
    gr = sapply(GeneRatio, function(x) {
      # Split the string by "/"
      parts <- unlist(strsplit(x, "/"))
      # Convert to numeric and perform the division
      as.numeric(parts[1]) / as.numeric(parts[2])
    })
  ) %>% dplyr::arrange(desc(gr))
subset3 <- GO3.df %>% dplyr::select(ID, Description,GeneRatio, p.adjust) %>% dplyr::mutate(group = "group3") %>%
  dplyr::mutate(
    gr = sapply(GeneRatio, function(x) {
      # Split the string by "/"
      parts <- unlist(strsplit(x, "/"))
      # Convert to numeric and perform the division
      as.numeric(parts[1]) / as.numeric(parts[2])
    })
  ) %>% dplyr::arrange(desc(gr))

GOlist <- factor(c("GO:0033002", "GO:0070373", "GO:0048730", "GO:0031103",
                 "GO:0022613", "GO:0050767", "GO:0034470", "GO:0016055", 
                 "GO:0006397", "GO:0030900", "GO:0008380"))

data <- bind_rows(bind_rows(subset1, subset2), subset3) %>%
  dplyr::filter(ID %in% GOlist)

descOrder <- sort(unique(data$Description))[c(1, 2, 5, 7,
                                 3, 4, 6, 8, 9, 10, 11)]

p <- ggplot(data, aes(x = group, y = factor(Description, level = descOrder), color = p.adjust, size = gr)) + 
  geom_point() + theme_bw() +
  scale_color_gradient(low = "red", high = "blue", limits = c(0, 0.05)) +
  scale_size_continuous(range = c(0, 3)) +
  labs(x = NULL, y = NULL) +
  theme(axis.text = element_text(size = 6),  # Set axis text size
        axis.title = element_text(size = 6), # Set axis title size (if not removed)
        legend.text = element_text(size = 6), # Set legend text size
        legend.title = element_text(size = 6)) 

fileName <- here(figDir, "..", "GO", "GO_groups")
height = 2
width = 3.5
svglite(paste0(fileName, ".svg"), height = height, width = width)
print(p)
dev.off()
```



##### - Average loop size
```{r}
# Checking average distance of loops per gene
# temp is a tibble where delta loop and log2fc are merged
temp$group <- factor(temp$group)

#
temp <- temp %>% dplyr::filter(group %in% c(1, 2, 5, 8))


getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$mean_distance
  distance2 <- (data %>% dplyr::filter(group ==group2) )$mean_distance
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

pv12 <- round(getPvalWilcox(temp, 1, 2), 5)
pv15 <- round(getPvalWilcox(temp, 1, 5), 5)
pv18 <- round(getPvalWilcox(temp, 1, 8), 5)
pv25 <- round(getPvalWilcox(temp, 2, 5), 5)
pv28 <- round(getPvalWilcox(temp, 2, 8), 5)
pv58 <- round(getPvalWilcox(temp, 5, 8), 5)


p <- ggplot(temp, aes(x = group, y = mean_distance)) + geom_violin(aes(fill = group), show.legend = FALSE) +
  geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() + scale_y_continuous(labels = label_kb_mb) +
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  annotate("text", x = 1, y = 1000000, label = paste0("pv12: ", pv12, "\n",
                                                      "pv15: ", pv15, "\n",
                                                      "pv18: ", pv18, "\n",
                                                      "pv25: ", pv25, "\n",
                                                      "pv28: ", pv28, "\n",
                                                      "pv58: ", pv58, "\n"), 
           color = "black", hjust = 0, size = 3)
  
  
fileName <- paste0("size_barplot_diffGroup_dTAG_vs_DMSO")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```

##### - Loop number per gene: P-N
```{r}
# Counting number of loop per genes
tempSum <- geneAnnoData %>% dplyr::select(peakID, gene, Anno2) %>% unnest(gene) %>% group_by(gene) %>%  summarize(
    peak = list(peakID),
    anno2 = list(Anno2),
    count = n())

tempSum <- tempSum %>% dplyr::mutate(
  group = ifelse(gene %in% gene.group1, "group1",
                 ifelse(gene %in% gene.group2, "group2",
                        ifelse(gene %in% gene.group3, "group3",
                               ifelse(gene %in% gene.group4, "group4",
                                      ifelse(gene %in% gene.group5, "group5",
                                             ifelse(gene %in% gene.group6, "group6",
                                                    ifelse(gene %in% gene.group7, "group7",
                                                           ifelse(gene %in% gene.group8, "group8",
                                                                  ifelse(gene %in% gene.group9, "group9", NA)))))))))
) %>%
  dplyr::filter(group %in% c("group1", "group2", "group5", "group8"))


# TEMP START
#geneList.group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group1.tsv"))
#geneList.group1.temp <- geneList.group1 %>% dplyr::left_join(tempSum, by = c("gene"))
# TEMP END

getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$count
  distance2 <- (data %>% dplyr::filter(group ==group2) )$count
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}


pv12 <- round(getPvalWilcox(tempSum,"group1", "group2"), 5)
pv15 <- round(getPvalWilcox(tempSum,"group1", "group5"), 5)
pv18 <- round(getPvalWilcox(tempSum,"group1", "group8"), 5)
pv25 <- round(getPvalWilcox(tempSum,"group2", "group5"), 5)
pv28 <- round(getPvalWilcox(tempSum,"group2", "group8"), 5)
pv58 <- round(getPvalWilcox(tempSum,"group5", "group8"), 5)


p <- ggplot(tempSum, aes(x = group, y = count, fill = group)) + 
  geom_boxplot(width = 0.5, outlier.shape = NA) + theme_classic() +       
  scale_y_continuous(breaks = seq(0, 10, by = 2), limits = c(0, 10)) + 
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")  +
  annotate("text", x = 1, y = 3, label = paste0("pv12: ", pv12, "\n",
                                                "pv15: ", pv15, "\n",
                                                "pv18: ", pv18, "\n",
                                                "pv25: ", pv25, "\n",
                                                "pv28: ", pv28, "\n",
                                                "pv58: ", pv58, "\n"), 
           color = "black", hjust = 0, size = 1) +  theme(legend.position = "none")


fileName <- paste0("count_barplot_diffGroup_dTAG_vs_DMSO")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```

##### - Counting loop types
```{r}
#######
temp2 <- tempSum %>% rowwise() %>% mutate(total = length(anno2),
                                 num_pp = sum(anno2 == "P-P"),
                                 num_pe = sum(anno2 == "P-E"),
                                 num_ps = sum(anno2 == "P-S"),
                                 num_px = sum(anno2 == "P-X"),
                                 ratio_reg = (num_pp + num_pe)/total,
                                 ratio_str = num_ps/total)

saveRDS(temp2, here(resultDir, "gene_loop_link.rds"))


loopType <- temp2 %>% group_by(group) %>% summarise(num_pp = sum(num_pp),
                                        num_pe = sum(num_pe),
                                        num_ps = sum(num_ps),
                                        num_px = sum(num_px))


loopTypeLong <- loopType %>% pivot_longer(-group, names_to = "type", values_to = "count")

loopTypeLong$type <- factor(loopTypeLong$type, levels = c("num_pp", "num_pe", "num_ps", "num_px"))

# Plotting
ggplot(loopTypeLong, aes(fill=type, y=count, x=group)) + 
    geom_bar(position="fill", stat="identity") + theme_classic()

```

##### - Loop number per gene: P-P
```{r}
########
#P-P
data <- temp2 %>% dplyr::select(group, num_pp)
colnames(data) <- c("group", "count")
pv12 <- round(getPvalWilcox(data,"group1", "group2"), 5)
pv15 <- round(getPvalWilcox(data,"group1", "group5"), 5)
pv18 <- round(getPvalWilcox(data,"group1", "group8"), 5)
pv25 <- round(getPvalWilcox(data,"group2", "group5"), 5)
pv28 <- round(getPvalWilcox(data,"group2", "group8"), 5)
pv58 <- round(getPvalWilcox(data,"group5", "group8"), 5)

p <- ggplot(data, aes(x = group, y = count, fill = group)) +
  geom_boxplot(width = 0.5, outlier.shape = NA) + theme_classic() +       
  scale_y_continuous(breaks = seq(0, 5, by = 2), limits = c(0, 5)) + 
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")  +
  annotate("text", x = 1, y = 3, label = paste0("pv12: ", pv12, "\n",
                                                "pv15: ", pv15, "\n",
                                                "pv18: ", pv18, "\n",
                                                "pv25: ", pv25, "\n",
                                                "pv28: ", pv28, "\n",
                                                "pv58: ", pv58, "\n"), 
           color = "black", hjust = 0, size = 1)+  theme(legend.position = "none")


fileName <- paste0("count_barplot_diffGroup_dTAG_vs_DMSO_pp")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```

##### - Loop number per gene: P-E
```{r}
########
#P-E
data <- temp2 %>% dplyr::select(group, num_pe)
colnames(data) <- c("group", "count")
pv12 <- round(getPvalWilcox(data,"group1", "group2"), 5)
pv15 <- round(getPvalWilcox(data,"group1", "group5"), 5)
pv18 <- round(getPvalWilcox(data,"group1", "group8"), 5)
pv25 <- round(getPvalWilcox(data,"group2", "group5"), 5)
pv28 <- round(getPvalWilcox(data,"group2", "group8"), 5)
pv58 <- round(getPvalWilcox(data,"group5", "group8"), 5)

p <- ggplot(data, aes(x = group, y = count, fill = group)) +
  geom_boxplot(width = 0.5, outlier.shape = NA) + theme_classic() +       
  scale_y_continuous(breaks = seq(0, 5, by = 2), limits = c(0, 5)) + 
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")  +
  annotate("text", x = 1, y = 3, label = paste0("pv12: ", pv12, "\n",
                                                "pv15: ", pv15, "\n",
                                                "pv18: ", pv18, "\n",
                                                "pv25: ", pv25, "\n",
                                                "pv28: ", pv28, "\n",
                                                "pv58: ", pv58, "\n"), 
           color = "black", hjust = 0, size = 1)+  theme(legend.position = "none")


fileName <- paste0("count_barplot_diffGroup_dTAG_vs_DMSO_pe")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```

##### - Loop number per gene: P-S
```{r}
########
#P-S
data <- temp2 %>% dplyr::select(group, num_ps)
colnames(data) <- c("group", "count")
pv12 <- round(getPvalWilcox(data,"group1", "group2"), 5)
pv15 <- round(getPvalWilcox(data,"group1", "group5"), 5)
pv18 <- round(getPvalWilcox(data,"group1", "group8"), 5)
pv25 <- round(getPvalWilcox(data,"group2", "group5"), 5)
pv28 <- round(getPvalWilcox(data,"group2", "group8"), 5)
pv58 <- round(getPvalWilcox(data,"group5", "group8"), 5)

p <- ggplot(data, aes(x = group, y = count, fill = group)) +
  geom_boxplot(width = 0.5, outlier.shape = NA) + theme_classic() +       
  scale_y_continuous(breaks = seq(0, 5, by = 2), limits = c(0, 5)) + 
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")  +
  annotate("text", x = 1, y = 3, label = paste0("pv12: ", pv12, "\n",
                                                "pv15: ", pv15, "\n",
                                                "pv18: ", pv18, "\n",
                                                "pv25: ", pv25, "\n",
                                                "pv28: ", pv28, "\n",
                                                "pv58: ", pv58, "\n"), 
           color = "black", hjust = 0, size = 1)+  theme(legend.position = "none")


fileName <- paste0("count_barplot_diffGroup_dTAG_vs_DMSO_ps")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```

##### - Loop number per gene: P-X
```{r}
########
#P-X
data <- temp2 %>% dplyr::select(group, num_px)
colnames(data) <- c("group", "count")
pv12 <- round(getPvalWilcox(data,"group1", "group2"), 5)
pv15 <- round(getPvalWilcox(data,"group1", "group5"), 5)
pv18 <- round(getPvalWilcox(data,"group1", "group8"), 5)
pv25 <- round(getPvalWilcox(data,"group2", "group5"), 5)
pv28 <- round(getPvalWilcox(data,"group2", "group8"), 5)
pv58 <- round(getPvalWilcox(data,"group5", "group8"), 5)

p <- ggplot(data, aes(x = group, y = count, fill = group)) +
  geom_boxplot(width = 0.5, outlier.shape = NA) + theme_classic() +       
  scale_y_continuous(breaks = seq(0, 5, by = 2), limits = c(0, 5)) + 
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")  +
  annotate("text", x = 1, y = 3, label = paste0("pv12: ", pv12, "\n",
                                                "pv15: ", pv15, "\n",
                                                "pv18: ", pv18, "\n",
                                                "pv25: ", pv25, "\n",
                                                "pv28: ", pv28, "\n",
                                                "pv58: ", pv58, "\n"), 
           color = "black", hjust = 0, size = 1)+  theme(legend.position = "none")


fileName <- paste0("count_barplot_diffGroup_dTAG_vs_DMSO_px")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```

#### A485
##### - Splitting genes to groups
```{r}
name <- "chromo_cons_annoHierarchy"

geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = 0.2,
                                      annoList = c("P-P", "P-E", "P-S", "P-X"))%>% 
  dplyr::mutate(distance = start2 - start1,
                peakID = paste(chrom1, start1, start2, sep = "_"))



## Dividing genes into groups
temp <- geneAnnoData %>% dplyr::select(diff_A485_DMSO, distance, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = mean(diff_A485_DMSO),
            mean_distance = mean(distance),
            .groups = 'drop')

diff.RNA <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)


temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  drop_na(shrinked_log2FC)


fcCutoff <- 0.5
diffCutoff <- 0.2
# 
temp <- temp %>%
  dplyr::mutate(group = ifelse(mean_diff_score < -diffCutoff,
                               ifelse(shrinked_log2FC < -fcCutoff, 1,
                                      ifelse(shrinked_log2FC <fcCutoff, 2, 3)),
                               ifelse(mean_diff_score < diffCutoff,
                                      ifelse(shrinked_log2FC < -fcCutoff, 4,
                                             ifelse(shrinked_log2FC < fcCutoff, 5, 6)),
                                      ifelse(shrinked_log2FC < -fcCutoff, 7,
                                             ifelse(shrinked_log2FC < fcCutoff, 8, 9)))))


gene.group1 <- (temp %>% dplyr::filter(group == 1))$gene
gene.group2 <- (temp %>% dplyr::filter(group == 2))$gene
gene.group3 <- (temp %>% dplyr::filter(group == 3))$gene
gene.group4 <- (temp %>% dplyr::filter(group == 4))$gene
gene.group5 <- (temp %>% dplyr::filter(group == 5))$gene
gene.group6 <- (temp %>% dplyr::filter(group == 6))$gene
gene.group7 <- (temp %>% dplyr::filter(group == 7))$gene
gene.group8 <- (temp %>% dplyr::filter(group == 8))$gene
gene.group9 <- (temp %>% dplyr::filter(group == 9))$gene

fwrite((temp %>% dplyr::filter(group == 1)), here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group1.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 2)), here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group2.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 3)), here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group3.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 4)), here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group4.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 5)), here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group5.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 6)), here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group6.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 7)), here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group7.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 8)), here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group8.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 9)), here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group9.tsv"), sep = "\t")


# Adding group information to geneAnno
geneAnnoData <- geneAnnoData %>% unnest(gene) %>% dplyr::mutate(
  group = ifelse(gene %in% gene.group1, "group1",
                 ifelse(gene %in% gene.group2, "group2",
                        ifelse(gene %in% gene.group3, "group3",
                               ifelse(gene %in% gene.group4, "group4",
                                      ifelse(gene %in% gene.group5, "group5",
                                             ifelse(gene %in% gene.group6, "group6",
                                                    ifelse(gene %in% gene.group7, "group7",
                                                           ifelse(gene %in% gene.group8, "group8",
                                                                  ifelse(gene %in% gene.group9, "group9", NA)))))))))
)


```
##### - GO for each group

```{r}
GO1 <- enrichGO(gene = gene.group1, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO2 <- enrichGO(gene = gene.group2, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO3 <- enrichGO(gene = gene.group3, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO4 <- enrichGO(gene = gene.group4, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO5 <- enrichGO(gene = gene.group5, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO6 <- enrichGO(gene = gene.group6, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO7 <- enrichGO(gene = gene.group7, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO8 <- enrichGO(gene = gene.group8, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO9 <- enrichGO(gene = gene.group9, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")

dotplot(GO1, showCategory = 10)

# 
# GO1.df <- as.data.frame(GO1)
# GO2.df <- as.data.frame(GO2)
# GO3.df <- as.data.frame(GO5)
# GO4.df <- as.data.frame(GO8)
# 
# subset1 <- GO1.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "group1") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# subset2 <- GO2.df %>% dplyr::select(ID, Description,GeneRatio, p.adjust) %>% dplyr::mutate(group = "group2") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# subset3 <- GO3.df %>% dplyr::select(ID, Description,GeneRatio, p.adjust) %>% dplyr::mutate(group = "group3") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# 
# GOlist <- factor(c("GO:0033002", "GO:0070373", "GO:0048730", "GO:0031103",
#                  "GO:0022613", "GO:0050767", "GO:0034470", "GO:0016055", 
#                  "GO:0006397", "GO:0030900", "GO:0008380"))
# 
# data <- bind_rows(bind_rows(subset1, subset2), subset3) %>%
#   dplyr::filter(ID %in% GOlist)
# 
# descOrder <- sort(unique(data$Description))[c(1, 2, 5, 7,
#                                  3, 4, 6, 8, 9, 10, 11)]
# 
# p <- ggplot(data, aes(x = group, y = factor(Description, level = descOrder), color = p.adjust, size = gr)) + 
#   geom_point() + theme_bw() +
#   scale_color_gradient(low = "red", high = "blue", limits = c(0, 0.05)) +
#   scale_size_continuous(range = c(0, 3)) +
#   labs(x = NULL, y = NULL) +
#   theme(axis.text = element_text(size = 6),  # Set axis text size
#         axis.title = element_text(size = 6), # Set axis title size (if not removed)
#         legend.text = element_text(size = 6), # Set legend text size
#         legend.title = element_text(size = 6)) 
# 
# fileName <- here(figDir, "..", "GO", "GO_groups")
# height = 2
# width = 3.5
# svglite(paste0(fileName, ".svg"), height = height, width = width)
# print(p)
# dev.off()
```



##### - Average loop size
```{r}
# Checking average distance of loops per gene
# temp is a tibble where delta loop and log2fc are merged
temp$group <- factor(temp$group)


# 
# getPvalWilcox <- function(data, group1, group2){
#   distance1 <- (data %>% dplyr::filter(group ==group1) )$mean_distance
#   distance2 <- (data %>% dplyr::filter(group ==group2) )$mean_distance
#   wil <- wilcox.test(distance1, distance2)
#   return(wil$p.value)
# }
# 
# pv12 <- round(getPvalWilcox(temp, 1, 2), 5)
# pv15 <- round(getPvalWilcox(temp, 1, 5), 5)
# pv18 <- round(getPvalWilcox(temp, 1, 8), 5)
# pv25 <- round(getPvalWilcox(temp, 2, 5), 5)
# pv28 <- round(getPvalWilcox(temp, 2, 8), 5)
# pv58 <- round(getPvalWilcox(temp, 5, 8), 5)


p <- ggplot(temp, aes(x = group, y = mean_distance)) + geom_violin(aes(fill = group), show.legend = FALSE) +
  geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() + scale_y_continuous(labels = label_kb_mb) +
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")
  # annotate("text", x = 1, y = 1000000, label = paste0("pv12: ", pv12, "\n",
  #                                                     "pv15: ", pv15, "\n",
  #                                                     "pv18: ", pv18, "\n",
  #                                                     "pv25: ", pv25, "\n",
  #                                                     "pv28: ", pv28, "\n",
  #                                                     "pv58: ", pv58, "\n"), 
  #          color = "black", hjust = 0, size = 3)
  # 
  
fileName <- paste0("size_barplot_diffGroup_A485_vs_DMSO")
height <- 3
width <- 4
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```

##### - Loop number per gene: P-N
```{r}
# Counting number of loop per genes
tempSum <- geneAnnoData %>% dplyr::select(peakID, gene, Anno2) %>% unnest(gene) %>% group_by(gene) %>%  summarize(
    peak = list(peakID),
    anno2 = list(Anno2),
    count = n())

tempSum <- tempSum %>% dplyr::mutate(
  group = ifelse(gene %in% gene.group1, "group1",
                 ifelse(gene %in% gene.group2, "group2",
                        ifelse(gene %in% gene.group3, "group3",
                               ifelse(gene %in% gene.group4, "group4",
                                      ifelse(gene %in% gene.group5, "group5",
                                             ifelse(gene %in% gene.group6, "group6",
                                                    ifelse(gene %in% gene.group7, "group7",
                                                           ifelse(gene %in% gene.group8, "group8",
                                                                  ifelse(gene %in% gene.group9, "group9", NA)))))))))
) %>% dplyr::filter(!is.na(group))

# 
# getPvalWilcox <- function(data, group1, group2){
#   distance1 <- (data %>% dplyr::filter(group ==group1) )$count
#   distance2 <- (data %>% dplyr::filter(group ==group2) )$count
#   wil <- wilcox.test(distance1, distance2)
#   return(wil$p.value)
# }
# 
# 
# pv12 <- round(getPvalWilcox(tempSum,"group1", "group2"), 5)
# pv15 <- round(getPvalWilcox(tempSum,"group1", "group5"), 5)
# pv18 <- round(getPvalWilcox(tempSum,"group1", "group8"), 5)
# pv25 <- round(getPvalWilcox(tempSum,"group2", "group5"), 5)
# pv28 <- round(getPvalWilcox(tempSum,"group2", "group8"), 5)
# pv58 <- round(getPvalWilcox(tempSum,"group5", "group8"), 5)


p <- ggplot(tempSum, aes(x = group, y = count, fill = group)) + 
  geom_boxplot(width = 0.5, outlier.shape = NA) + theme_classic() +       
  scale_y_continuous(breaks = seq(0, 10, by = 2)) + 
  coord_cartesian(ylim = c(0, 10)) +
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  theme(legend.position = "none")
  # annotate("text", x = 1, y = 3, label = paste0("pv12: ", pv12, "\n",
  #                                               "pv15: ", pv15, "\n",
  #                                               "pv18: ", pv18, "\n",
  #                                               "pv25: ", pv25, "\n",
  #                                               "pv28: ", pv28, "\n",
  #                                               "pv58: ", pv58, "\n"), 
  #          color = "black", hjust = 0, size = 1) +  


fileName <- paste0("count_barplot_diffGroup_A485_vs_DMSO")
height <- 3
width <- 4
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```

##### - Counting loop types
```{r}
#######
temp2 <- tempSum %>% rowwise() %>% mutate(total = length(anno2),
                                 num_pp = sum(anno2 == "P-P"),
                                 num_pe = sum(anno2 == "P-E"),
                                 num_ps = sum(anno2 == "P-S"),
                                 num_px = sum(anno2 == "P-X"),
                                 ratio_reg = (num_pp + num_pe)/total,
                                 ratio_str = num_ps/total)

saveRDS(temp2, here(resultDir, "gene_loop_link_A485.rds"))


loopType <- temp2 %>% group_by(group) %>% summarise(num_pp = sum(num_pp),
                                        num_pe = sum(num_pe),
                                        num_ps = sum(num_ps),
                                        num_px = sum(num_px))


loopTypeLong <- loopType %>% pivot_longer(-group, names_to = "type", values_to = "count")

loopTypeLong$type <- factor(loopTypeLong$type, levels = c("num_pp", "num_pe", "num_ps", "num_px"))

# Plotting
ggplot(loopTypeLong, aes(fill=type, y=count, x=group)) + 
    geom_bar(position="fill", stat="identity") + theme_classic()

```

##### - Loop number per gene: P-P
```{r}
########
#P-P
data <- temp2 %>% dplyr::select(group, num_pp)
colnames(data) <- c("group", "count")
# pv12 <- round(getPvalWilcox(data,"group1", "group2"), 5)
# pv15 <- round(getPvalWilcox(data,"group1", "group5"), 5)
# pv18 <- round(getPvalWilcox(data,"group1", "group8"), 5)
# pv25 <- round(getPvalWilcox(data,"group2", "group5"), 5)
# pv28 <- round(getPvalWilcox(data,"group2", "group8"), 5)
# pv58 <- round(getPvalWilcox(data,"group5", "group8"), 5)

p <- ggplot(data, aes(x = group, y = count, fill = group)) +
  geom_boxplot(width = 0.5, outlier.shape = NA) + theme_classic() +       
  scale_y_continuous(breaks = seq(0, 5, by = 2), limits = c(0, 5)) + 
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")  +
  theme(legend.position = "none")
  # annotate("text", x = 1, y = 3, label = paste0("pv12: ", pv12, "\n",
  #                                               "pv15: ", pv15, "\n",
  #                                               "pv18: ", pv18, "\n",
  #                                               "pv25: ", pv25, "\n",
  #                                               "pv28: ", pv28, "\n",
  #                                               "pv58: ", pv58, "\n"), 
  #          color = "black", hjust = 0, size = 1)


fileName <- paste0("count_barplot_diffGroup_A485_vs_DMSO_pp")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```

##### - Loop number per gene: P-E
```{r}
########
#P-E
data <- temp2 %>% dplyr::select(group, num_pe)
colnames(data) <- c("group", "count")
# pv12 <- round(getPvalWilcox(data,"group1", "group2"), 5)
# pv15 <- round(getPvalWilcox(data,"group1", "group5"), 5)
# pv18 <- round(getPvalWilcox(data,"group1", "group8"), 5)
# pv25 <- round(getPvalWilcox(data,"group2", "group5"), 5)
# pv28 <- round(getPvalWilcox(data,"group2", "group8"), 5)
# pv58 <- round(getPvalWilcox(data,"group5", "group8"), 5)

p <- ggplot(data, aes(x = group, y = count, fill = group)) +
  geom_boxplot(width = 0.5, outlier.shape = NA) + theme_classic() +       
  scale_y_continuous(breaks = seq(0, 5, by = 2), limits = c(0, 5)) + 
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")  +
  theme(legend.position = "none")
  # annotate("text", x = 1, y = 3, label = paste0("pv12: ", pv12, "\n",
  #                                               "pv15: ", pv15, "\n",
  #                                               "pv18: ", pv18, "\n",
  #                                               "pv25: ", pv25, "\n",
  #                                               "pv28: ", pv28, "\n",
  #                                               "pv58: ", pv58, "\n"), 
  #          color = "black", hjust = 0, size = 1)


fileName <- paste0("count_barplot_diffGroup_A485_vs_DMSO_pe")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```

##### - Loop number per gene: P-S
```{r}
########
#P-S
data <- temp2 %>% dplyr::select(group, num_ps)
colnames(data) <- c("group", "count")
# pv12 <- round(getPvalWilcox(data,"group1", "group2"), 5)
# pv15 <- round(getPvalWilcox(data,"group1", "group5"), 5)
# pv18 <- round(getPvalWilcox(data,"group1", "group8"), 5)
# pv25 <- round(getPvalWilcox(data,"group2", "group5"), 5)
# pv28 <- round(getPvalWilcox(data,"group2", "group8"), 5)
# pv58 <- round(getPvalWilcox(data,"group5", "group8"), 5)

p <- ggplot(data, aes(x = group, y = count, fill = group)) +
  geom_boxplot(width = 0.5, outlier.shape = NA) + theme_classic() +       
  scale_y_continuous(breaks = seq(0, 5, by = 2), limits = c(0, 5)) + 
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")  +
  theme(legend.position = "none")
  # annotate("text", x = 1, y = 3, label = paste0("pv12: ", pv12, "\n",
  #                                               "pv15: ", pv15, "\n",
  #                                               "pv18: ", pv18, "\n",
  #                                               "pv25: ", pv25, "\n",
  #                                               "pv28: ", pv28, "\n",
  #                                               "pv58: ", pv58, "\n"), 
  #          color = "black", hjust = 0, size = 1)+  


fileName <- paste0("count_barplot_diffGroup_A485_vs_DMSO_ps")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```

##### - Loop number per gene: P-X
```{r}
########
#P-X
data <- temp2 %>% dplyr::select(group, num_px)
colnames(data) <- c("group", "count")
# pv12 <- round(getPvalWilcox(data,"group1", "group2"), 5)
# pv15 <- round(getPvalWilcox(data,"group1", "group5"), 5)
# pv18 <- round(getPvalWilcox(data,"group1", "group8"), 5)
# pv25 <- round(getPvalWilcox(data,"group2", "group5"), 5)
# pv28 <- round(getPvalWilcox(data,"group2", "group8"), 5)
# pv58 <- round(getPvalWilcox(data,"group5", "group8"), 5)

p <- ggplot(data, aes(x = group, y = count, fill = group)) +
  geom_boxplot(width = 0.5, outlier.shape = NA) + theme_classic() +       
  scale_y_continuous(breaks = seq(0, 5, by = 2), limits = c(0, 5)) + 
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")  +
  theme(legend.position = "none")
  # annotate("text", x = 1, y = 3, label = paste0("pv12: ", pv12, "\n",
  #                                               "pv15: ", pv15, "\n",
  #                                               "pv18: ", pv18, "\n",
  #                                               "pv25: ", pv25, "\n",
  #                                               "pv28: ", pv28, "\n",
  #                                               "pv58: ", pv58, "\n"), 
  #          color = "black", hjust = 0, size = 1)+  


fileName <- paste0("count_barplot_diffGroup_A485_vs_DMSO_px")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```


### dTAG
#### Grouping with P-N number
##### Grouping
```{r}
name <- "chromo_cons_annoHierarchy"

alpha <- 0.05
fcCutoff <- 0.5
diff.PRO.G1.dTAG <- fread(here(refDir, "diff_G1G2.dTAG_G1.dTAG_vs_G1.DMSO_PROseq.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.PRO.G2.dTAG <- fread(here(refDir, "diff_G1G2.dTAG_G2.dTAG_vs_G2.DMSO_PROseq.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG.noFCcutoff <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::filter(padj < alpha)
############
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = 0.2,
                                      annoList = c("P-P", "P-E", "P-S", "P-X"))%>% 
  dplyr::mutate(distance = start2 - start1,
                peakID = paste(chrom1, start1, start2, sep = "_"))



# temp2 contains genes from group 1, 2, 5, 8 and loop counts
#calculating diff score and log2fc distribution based on p-n numbers
resultDir <- here("../../result")

temp2 <- readRDS(here(resultDir, "gene_loop_link.rds"))


pnOver8 <- (temp2 %>% dplyr::filter(total >= 8))$gene
pnOver6 <- (temp2 %>% dplyr::filter(total >= 6, total < 8))$gene
pnOver4 <- (temp2 %>% dplyr::filter(total >= 4, total < 6))$gene
pnOver2 <- (temp2 %>% dplyr::filter(total >= 2, total < 4))$gene
pnOver0 <- (temp2 %>% dplyr::filter(total < 2))$gene



## Dividing genes into groups
temp <- geneAnnoData %>% dplyr::select(diff_dTAG_DMSO, distance, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = mean(diff_dTAG_DMSO),
            mean_distance = mean(distance),
            .groups = 'drop')

diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)

maxLog2FC = 2

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% diff.RNA.G1.dTAG$ensembl_gene_id, "2DOWN", "0NO"),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                log2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag) %>%
  drop_na(shrinked_log2FC)

temp <- temp %>% dplyr::mutate(
  pnOver = ifelse(gene %in% pnOver8, "pnOver8",
                  ifelse(gene %in% pnOver6, "pnOver6",
                         ifelse(gene %in% pnOver4, "pnOver4",
                                ifelse(gene %in% pnOver2, "pnOver2",
                                       ifelse(gene %in% pnOver0, "pnOver0", NA)))))) %>%
  drop_na(pnOver)
```

##### GO for each group
```{r}
GOfigDir <- here(figDir, "../GO")
getGO("pnOver8", GOfigDir, pnOver8)
getGO("pnOver6", GOfigDir, pnOver6)
getGO("pnOver4", GOfigDir, pnOver4)
getGO("pnOver2", GOfigDir, pnOver2)
getGO("pnOver0", GOfigDir, pnOver0)

#####################
GO0.df <- as.data.frame(enrichGO(gene = pnOver0, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
GO2.df <- as.data.frame(enrichGO(gene = pnOver2, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
GO4.df <- as.data.frame(enrichGO(gene = pnOver4, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
GO6.df <- as.data.frame(enrichGO(gene = pnOver6, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
GO8.df <- as.data.frame(enrichGO(gene = pnOver8, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))


subset0 <- GO0.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "pnOver0") %>%
  dplyr::mutate(
    gr = sapply(GeneRatio, function(x) {
      # Split the string by "/"
      parts <- unlist(strsplit(x, "/"))
      # Convert to numeric and perform the division
      as.numeric(parts[1]) / as.numeric(parts[2])
    })
  ) %>% dplyr::arrange(desc(gr))
subset2 <- GO2.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "pnOver2") %>%
  dplyr::mutate(
    gr = sapply(GeneRatio, function(x) {
      # Split the string by "/"
      parts <- unlist(strsplit(x, "/"))
      # Convert to numeric and perform the division
      as.numeric(parts[1]) / as.numeric(parts[2])
    })
  ) %>% dplyr::arrange(desc(gr))
subset4 <- GO4.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "pnOver4") %>%
  dplyr::mutate(
    gr = sapply(GeneRatio, function(x) {
      # Split the string by "/"
      parts <- unlist(strsplit(x, "/"))
      # Convert to numeric and perform the division
      as.numeric(parts[1]) / as.numeric(parts[2])
    })
  ) %>% dplyr::arrange(desc(gr))
subset6 <- GO6.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "pnOver6") %>%
  dplyr::mutate(
    gr = sapply(GeneRatio, function(x) {
      # Split the string by "/"
      parts <- unlist(strsplit(x, "/"))
      # Convert to numeric and perform the division
      as.numeric(parts[1]) / as.numeric(parts[2])
    })
  ) %>% dplyr::arrange(desc(gr))
subset8 <- GO8.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "pnOver8") %>%
  dplyr::mutate(
    gr = sapply(GeneRatio, function(x) {
      # Split the string by "/"
      parts <- unlist(strsplit(x, "/"))
      # Convert to numeric and perform the division
      as.numeric(parts[1]) / as.numeric(parts[2])
    })
  ) %>% dplyr::arrange(desc(gr))


GOlist <- factor(c("GO:0006397", "GO:0008380", "GO:0022613", "GO:0034470",
                   "GO:0016055", "GO:0007389", "GO:0048562", "GO:0045165", 
                   "GO:0072001", "GO:0007517", "GO:0048705"))

data <- bind_rows(bind_rows(bind_rows(subset0, subset2), subset4), subset8) %>%
  dplyr::filter(ID %in% GOlist)

p <- ggplot(data, aes(x = group, y = Description, color = p.adjust, size = gr)) + 
  geom_point() + theme_bw() +
  scale_color_gradient(low = "red", high = "blue", limits = c(0, 0.05)) +
  scale_size_continuous(range = c(0, 3)) +
  labs(x = NULL, y = NULL) +
  theme(axis.text = element_text(size = 6),  # Set axis text size
        axis.title = element_text(size = 6), # Set axis title size (if not removed)
        legend.text = element_text(size = 6), # Set legend text size
        legend.title = element_text(size = 6)) 

fileName <- here(figDir, "..", "GO", "GO_groups_pn")
height = 2
width = 3.3
svglite(paste0(fileName, ".svg"), height = height, width = width)
print(p)
dev.off()
```


##### loop score
```{r}
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(pnOver ==group1) )$mean_diff_score
  distance2 <- (data %>% dplyr::filter(pnOver ==group2) )$mean_diff_score
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

pv02 <- round(getPvalWilcox(temp, "pnOver0", "pnOver2"), 5)
pv24 <- round(getPvalWilcox(temp, "pnOver2", "pnOver4"), 5)
pv46 <- round(getPvalWilcox(temp, "pnOver4", "pnOver6"), 5)
pv08 <- round(getPvalWilcox(temp, "pnOver0", "pnOver8"), 5)
pv28 <- round(getPvalWilcox(temp, "pnOver2", "pnOver8"), 5)
pv48 <- round(getPvalWilcox(temp, "pnOver4", "pnOver8"), 5)
pv68 <- round(getPvalWilcox(temp, "pnOver6", "pnOver8"), 5)


p <- ggplot(temp, aes(x = pnOver, y = mean_diff_score)) + geom_violin(aes(fill = pnOver), show.legend = FALSE) +
  geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() + 
  stat_summary(aes(group = pnOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  geom_hline(yintercept = 0.2)+
  annotate("text", x = 1, y = 0, label = paste0("pv02: ", convPvalue(pv02), "\n",
                                                "pv24: ", convPvalue(pv24), "\n",
                                                "pv46: ", convPvalue(pv46), "\n",
                                                "pv68: ", convPvalue(pv68), "\n",
                                                "pv48: ", convPvalue(pv48), "\n",
                                                "pv28: ", convPvalue(pv28), "\n",
                                                "pv08: ", convPvalue(pv08), "\n"), 
           color = "black", hjust = 0, size = 3)

  
fileName <- paste0("diffScore_barplot_pnGroup_dTAG_vs_DMSO")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```

##### log2FC
```{r}
#
 getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(pnOver ==group1) )$log2FoldChange
  distance2 <- (data %>% dplyr::filter(pnOver ==group2) )$log2FoldChange
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

tempDown <- temp %>% dplyr::filter(log2FoldChange < 0)
 
pv02 <- round(getPvalWilcox(tempDown, "pnOver0", "pnOver2"), 5)
pv04 <- round(getPvalWilcox(tempDown, "pnOver0", "pnOver4"), 5)
pv06 <- round(getPvalWilcox(tempDown, "pnOver0", "pnOver6"), 5)
pv08 <- round(getPvalWilcox(tempDown, "pnOver0", "pnOver8"), 5)
pv24 <- round(getPvalWilcox(tempDown, "pnOver2", "pnOver4"), 5)
pv26 <- round(getPvalWilcox(tempDown, "pnOver2", "pnOver6"), 5)
pv28 <- round(getPvalWilcox(tempDown, "pnOver2", "pnOver8"), 5)
pv46 <- round(getPvalWilcox(tempDown, "pnOver4", "pnOver6"), 5)
pv48 <- round(getPvalWilcox(tempDown, "pnOver4", "pnOver8"), 5)
pv68 <- round(getPvalWilcox(tempDown, "pnOver6", "pnOver8"), 5)




p <- ggplot(tempDown, aes(x = pnOver, y = log2FoldChange)) + geom_violin(aes(fill = pnOver), show.legend = FALSE) +
  geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() + 
  stat_summary(aes(group = pnOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  geom_hline(yintercept = 0)+
  annotate("text", x = 1, y = -0.5, label = paste0("pv02: ", convPvalue(pv02), "\n",
                                                   "pv04: ", convPvalue(pv04), "\n",
                                                   "pv06: ", convPvalue(pv06), "\n",
                                                   "pv08: ", convPvalue(pv08), "\n",
                                                   "pv24: ", convPvalue(pv24), "\n",
                                                   "pv26: ", convPvalue(pv26), "\n",
                                                   "pv28: ", convPvalue(pv28), "\n",
                                                   "pv46: ", convPvalue(pv46), "\n",
                                                   "pv48: ", convPvalue(pv48), "\n",
                                                   "pv68: ", convPvalue(pv68), "\n"), 
           color = "black", hjust = 0, size = 3) + coord_cartesian(ylim = c(-1, 0))

  
fileName <- paste0("log2FC_barplot_pnGroup_dTAG_vs_DMSO_down")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()



tempUp <- temp %>% dplyr::filter(log2FoldChange > 0)
 
pv02 <- round(getPvalWilcox(tempUp, "pnOver0", "pnOver2"), 5)
pv04 <- round(getPvalWilcox(tempUp, "pnOver0", "pnOver4"), 5)
pv06 <- round(getPvalWilcox(tempUp, "pnOver0", "pnOver6"), 5)
pv08 <- round(getPvalWilcox(tempUp, "pnOver0", "pnOver8"), 5)
pv24 <- round(getPvalWilcox(tempUp, "pnOver2", "pnOver4"), 5)
pv26 <- round(getPvalWilcox(tempUp, "pnOver2", "pnOver6"), 5)
pv28 <- round(getPvalWilcox(tempUp, "pnOver2", "pnOver8"), 5)
pv46 <- round(getPvalWilcox(tempUp, "pnOver4", "pnOver6"), 5)
pv48 <- round(getPvalWilcox(tempUp, "pnOver4", "pnOver8"), 5)
pv68 <- round(getPvalWilcox(tempUp, "pnOver6", "pnOver8"), 5)




p <- ggplot(tempUp, aes(x = pnOver, y = log2FoldChange)) + geom_violin(aes(fill = pnOver), show.legend = FALSE) +
  geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() + 
  stat_summary(aes(group = pnOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  geom_hline(yintercept = 0)+
  annotate("text", x = 1, y = 0.5, label = paste0("pv02: ", convPvalue(pv02), "\n",
                                                   "pv04: ", convPvalue(pv04), "\n",
                                                   "pv06: ", convPvalue(pv06), "\n",
                                                   "pv08: ", convPvalue(pv08), "\n",
                                                   "pv24: ", convPvalue(pv24), "\n",
                                                   "pv26: ", convPvalue(pv26), "\n",
                                                   "pv28: ", convPvalue(pv28), "\n",
                                                   "pv46: ", convPvalue(pv46), "\n",
                                                   "pv48: ", convPvalue(pv48), "\n",
                                                   "pv68: ", convPvalue(pv68), "\n"), 
           color = "black", hjust = 0, size = 3) + coord_cartesian(ylim = c(0, 1))

  
fileName <- paste0("log2FC_barplot_pnGroup_dTAG_vs_DMSO_up")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```

#### Grouping with P-S number
##### Grouping
```{r}
# temp2 contains genes from group 1, 2, 5, 8 and loop counts
#calculating diff score and log2fc distribution based on p-n numbers

psOver4 <- (temp2 %>% dplyr::filter(num_ps >= 4))$gene
psOver3 <- (temp2 %>% dplyr::filter(num_ps >= 3, num_ps < 4))$gene
psOver2 <- (temp2 %>% dplyr::filter(num_ps >= 2, num_ps < 3))$gene
psOver1 <- (temp2 %>% dplyr::filter(num_ps >= 1, num_ps < 2))$gene
psOver0 <- (temp2 %>% dplyr::filter(num_ps < 1))$gene



## Dividing genes into groups
temp <- geneAnnoData %>% dplyr::select(diff_dTAG_DMSO, distance, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = mean(diff_dTAG_DMSO),
            mean_distance = mean(distance),
            .groups = 'drop')

diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)

maxLog2FC = 2

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% diff.RNA.G1.dTAG$ensembl_gene_id, "2DOWN", "0NO"),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                log2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag) %>%
  drop_na(shrinked_log2FC)

temp <- temp %>% dplyr::mutate(
  psOver = ifelse(gene %in% psOver4, "psOver4",
                  ifelse(gene %in% psOver3, "psOver3",
                         ifelse(gene %in% psOver2, "psOver2",
                                ifelse(gene %in% psOver1, "psOver1",
                                       ifelse(gene %in% psOver0, "psOver0", NA)))))) %>%
  drop_na(psOver)

```

##### GO for each group
```{r}
GOfigDir <- here(figDir, "../GO")

#####################
GO0.df <- as.data.frame(enrichGO(gene = psOver0, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))
GO1.df <- as.data.frame(enrichGO(gene = psOver1, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))
GO2.df <- as.data.frame(enrichGO(gene = psOver2, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))
GO3.df <- as.data.frame(enrichGO(gene = psOver3, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))
GO4.df <- as.data.frame(enrichGO(gene = psOver4, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))

fwrite(GO0.df, here("GO_PS_group0.tsv"), sep = "\t")
fwrite(GO1.df, here("GO_PS_group1.tsv"), sep = "\t")
fwrite(GO2.df, here("GO_PS_group2.tsv"), sep = "\t")
fwrite(GO3.df, here("GO_PS_group3.tsv"), sep = "\t")
fwrite(GO4.df, here("GO_PS_group4.tsv"), sep = "\t")

GO0.df <- fread(here("GO_PS_group0.tsv"))
GO1.df <- fread(here("GO_PS_group1.tsv"))
GO2.df <- fread(here("GO_PS_group2.tsv"))
GO3.df <- fread(here("GO_PS_group3.tsv"))
GO4.df <- fread(here("GO_PS_group4.tsv"))

subset0 <- GO0.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 0") %>%
  dplyr::arrange(p.adjust)
subset1 <- GO1.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 1") %>%
  dplyr::arrange(p.adjust)
subset2 <- GO2.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 2") %>%
  dplyr::arrange(p.adjust)
subset3 <- GO3.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 3") %>%
  dplyr::arrange(p.adjust)
subset4 <- GO4.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 4") %>%
  dplyr::arrange(p.adjust)

subset0$GeneRatio <- sapply(strsplit(subset0$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))
subset1$GeneRatio <- sapply(strsplit(subset1$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))
subset2$GeneRatio <- sapply(strsplit(subset2$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))
subset3$GeneRatio <- sapply(strsplit(subset3$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))
subset4$GeneRatio <- sapply(strsplit(subset4$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))


GOlist <- factor(c("GO:0008380", "GO:0006397", "GO:0034470", "GO:0022613", "GO:0016055",
                   "GO:0061138", "GO:0060562", "GO:0007389", "GO:0060485", "GO:0048638", "GO:0045664"))

data <- bind_rows(bind_rows(bind_rows(bind_rows(subset0, subset1), subset2), subset3), subset4) %>%
  dplyr::filter(ID %in% GOlist)

descOrder <- sort(unique(data$Description))[rev(c(10, 4, 5, 9, 11,
                                                  2, 6,
                                                  3, 1, 8, 7))]

data <- data %>% dplyr::rowwise() %>% dplyr::mutate(pValueLog = min(-log10(p.adjust), pValueLogMax))
p <- ggplot(data, aes(x = group, y = factor(Description, levels = descOrder), size = pValueLog, fill = GeneRatio)) + 
  geom_point(shape = 21,        # Ensures a point with an outline
             stroke = 1*ptToMM) + 
  scale_size_continuous(range = c(0.5, 2)) +  # Set min and max point sizes here
  scale_fill_gradient(low = "white", high = "#CB333A",
                      # limits = c(0, 1),
                      oob = scales::squish, # Define gradient colors
                      guide = guide_colorbar(
                        barwidth = 1.5/5.08,  # Adjust width of the color bar
                        barheight = 15/5.08   # Adjust height of the color bar
                      )
  ) + 
  labs(x = NULL, y = NULL) +
  theme_bw() +  # Apply theme_bw first, so custom theme settings come after
  theme(
    panel.background = element_rect(fill = "transparent"),  # Override theme_bw panel
    axis.title = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.text.x = element_text(
      size = fontSizeS,         # Ensure size is set for x-axis text
      family = fontType,
      color = "#000000",
    ),
    axis.text.y = element_text(
      size = fontSizeS,         # Ensure size is set for y-axis text
      family = fontType,
      color = "#000000",
      lineheight = 0.9          # Allows wrapping for y-axis labels to fit into 2 lines
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick * mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick * mmToLineUnit,
      lineend = "square"
    ),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )

fileName <- here(figDir, "..", "GO", "GO_groups_ps")
width <- panelSize(2.7)*mmToInch
height <- panelSize(1.2)*mmToInch
svglite(paste0(fileName, ".svg"), height = height, width = width)
print(p)
dev.off()
```

##### loop score
```{r}
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(psOver ==group1) )$mean_diff_score
  distance2 <- (data %>% dplyr::filter(psOver ==group2) )$mean_diff_score
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

ps01 <- round(getPvalWilcox(temp, "psOver0", "psOver1"), 5)
ps12 <- round(getPvalWilcox(temp, "psOver1", "psOver2"), 5)
ps23 <- round(getPvalWilcox(temp, "psOver2", "psOver3"), 5)
ps34 <- round(getPvalWilcox(temp, "psOver3", "psOver4"), 5)
ps24 <- round(getPvalWilcox(temp, "psOver2", "psOver4"), 5)
ps14 <- round(getPvalWilcox(temp, "psOver1", "psOver4"), 5)
ps04 <- round(getPvalWilcox(temp, "psOver0", "psOver4"), 5)


p <- ggplot(temp, aes(x = psOver, y = mean_diff_score)) + 
  geom_violin(aes(fill = psOver), 
              color = "black",
              linewidth = lineMedium * mmToLineUnit, lineend = "square",
              show.legend = FALSE) +
  geom_boxplot(width = 0.3, color = "black",
               linewidth = lineMedium * mmToLineUnit, lineend = "square",
               outlier.shape = NA
  ) + theme_classic() + labs(x = NULL , y = "Average Δ loop score") +
  stat_summary(
    aes(group = psOver), fun = mean,
    geom = "point", shape = 21, size = 0.5,
    fill = "black", color = "black"
  ) +
  geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
  theme(
    axis.title = element_text(
      size = fontSizeM, family = fontType, color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45, hjust = 1, vjust = 1
    ),
    axis.line = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )+
  annotate("text", x = 1, y = 0, label = paste0("ps01: ", convPvalue(ps01), "\n",
                                                "ps12: ", convPvalue(ps12), "\n",
                                                "ps23: ", convPvalue(ps23), "\n",
                                                "ps34: ", convPvalue(ps34), "\n",
                                                "ps24: ", convPvalue(ps24), "\n", 
                                                "ps14: ", convPvalue(ps14), "\n",
                                                "ps04: ", convPvalue(ps04), "\n"), 
           color = "black", hjust = 0, size = 2) +
    scale_fill_manual(values = c("#777777", "#8B7E65", "#A28452", "#C2884D", "#F28E2C")) +
  coord_cartesian(ylim = c(-0.5, 0.1))



fileName <- paste0("diffScore_barplot_psGroup_dTAG_vs_DMSO_2")
width <- 33*mmToInch
height <-33*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```

##### log2FC
```{r}
temp <- temp %>% dplyr::mutate(absLog2FC = abs(log2FoldChange))

ks_result1 <- ks.test(
  temp %>% dplyr::filter(psOver == "psOver4") %>% pull(absLog2FC),
  temp %>% dplyr::filter(psOver == "psOver3") %>% pull(absLog2FC)
)
ks_result2 <- ks.test(
  temp %>% dplyr::filter(psOver == "psOver4") %>% pull(absLog2FC),
  temp %>% dplyr::filter(psOver == "psOver2") %>% pull(absLog2FC)
)
ks_result3 <- ks.test(
  temp %>% dplyr::filter(psOver == "psOver4") %>% pull(absLog2FC),
  temp %>% dplyr::filter(psOver == "psOver1") %>% pull(absLog2FC)
)
ks_result4 <- ks.test(
  temp %>% dplyr::filter(psOver == "psOver4") %>% pull(absLog2FC),
  temp %>% dplyr::filter(psOver == "psOver0") %>% pull(absLog2FC)
)

p <- ggplot(temp, aes(x = absLog2FC, color = psOver)) +
scale_color_manual(values = (c("#777777", "#8B7E65", "#A28452", "#C2884D", "#F28E2C"))) +
  stat_ecdf(size = 0.4, linewidth = lineMedium * mmToLineUnit, lineend = "square" ) + # Use stat_ecdf to plot the empirical CDF
  labs(
    x = "Abs. log2(fold change)",
    y = "Cumulative Probability"
  ) + coord_cartesian(xlim = c(0, 1.5)) +
  theme_classic() + # Clean theme
  theme(
      axis.title = element_text(
        size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.position = "none",
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
    ) + scale_y_continuous(labels = scales::number_format(accuracy = 0.1))
fileName <- paste0("log2FC_cdf_psGroup_dTAG_vs_DMSO")
width <- 33*mmToInch
height <-33*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()




#
 getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(psOver ==group1) )$log2FoldChange
  distance2 <- (data %>% dplyr::filter(psOver ==group2) )$log2FoldChange
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

tempDown <- temp %>% dplyr::filter(log2FoldChange < 0)
 
ps01 <- round(getPvalWilcox(tempDown, "psOver0", "psOver1"), 5)
ps12 <- round(getPvalWilcox(tempDown, "psOver1", "psOver2"), 5)
ps23 <- round(getPvalWilcox(tempDown, "psOver2", "psOver3"), 5)
ps34 <- round(getPvalWilcox(tempDown, "psOver3", "psOver4"), 5)
ps24 <- round(getPvalWilcox(tempDown, "psOver2", "psOver4"), 5)
ps14 <- round(getPvalWilcox(tempDown, "psOver1", "psOver4"), 5)

ps04 <- round(getPvalWilcox(tempDown, "psOver0", "psOver4"), 5)


p <- ggplot(tempDown, aes(x = psOver, y = log2FoldChange)) + 
  scale_fill_manual(values = c("#D9D9D9", "#BFBFBF", "#A6A6A6", "#8C8C8C", "#737373")) + # Five shades of grey
  geom_violin(aes(fill = psOver), 
              color = "black",
              linewidth = lineThick * mmToLineUnit, lineend = "square",
              show.legend = FALSE) +
  geom_boxplot(width = 0.1, color = "black",
               linewidth = lineThick * mmToLineUnit, lineend = "square",
               outlier.shape = NA
  ) + theme_classic() + labs(x = NULL , y = "log2(fold change)") +
  stat_summary(
    aes(group = psOver), fun = mean,
    geom = "point", shape = 21, size = 1,
    fill = "red", color = "black"
  ) +
  geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
  theme(
    axis.title = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45, hjust = 1, vjust = 1
    ),
    axis.line = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )+
  annotate("text", x = 1, y = -0.5, label = paste0("ps01: ", convPvalue(ps01), "\n",
                                                "ps12: ", convPvalue(ps12), "\n",
                                                "ps23: ", convPvalue(ps23), "\n",
                                                "ps34: ", convPvalue(ps34), "\n",
                                                "ps24: ", convPvalue(ps24), "\n", 
                                                "ps14: ", convPvalue(ps14), "\n",
                                                "ps04: ", convPvalue(ps04), "\n"), 
           color = "black", hjust = 0, size = 2) + coord_cartesian(ylim = c(-1, 0))
  
fileName <- paste0("log2FC_barplot_psGroup_dTAG_vs_DMSO_down")
width <- panelSize(1.2)*mmToInch
height <- panelSize(1)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()



tempUp <- temp %>% dplyr::filter(log2FoldChange > 0)
 
ps01 <- round(getPvalWilcox(tempUp, "psOver0", "psOver1"), 5)
ps12 <- round(getPvalWilcox(tempUp, "psOver1", "psOver2"), 5)
ps23 <- round(getPvalWilcox(tempUp, "psOver2", "psOver3"), 5)
ps34 <- round(getPvalWilcox(tempUp, "psOver3", "psOver4"), 5)
ps24 <- round(getPvalWilcox(tempUp, "psOver2", "psOver4"), 5)
ps14 <- round(getPvalWilcox(tempUp, "psOver1", "psOver4"), 5)
ps04 <- round(getPvalWilcox(tempUp, "psOver0", "psOver4"), 5)




p <- ggplot(tempUp, aes(x = psOver, y = log2FoldChange)) + 
  scale_fill_manual(values = c("#D9D9D9", "#BFBFBF", "#A6A6A6", "#8C8C8C", "#737373")) + # Five shades of grey
  geom_violin(aes(fill = psOver), 
              color = "black",
              linewidth = lineThick * mmToLineUnit, lineend = "square",
              show.legend = FALSE) +
  geom_boxplot(width = 0.1, color = "black",
               linewidth = lineThick * mmToLineUnit, lineend = "square",
               outlier.shape = NA
  ) + theme_classic() + labs(x = NULL , y = "log2(fold change)") +
  stat_summary(
    aes(group = psOver), fun = mean,
    geom = "point", shape = 21, size = 1,
    fill = "red", color = "black"
  ) +
  geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
  theme(
    axis.title = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45, hjust = 1, vjust = 1
    ),
    axis.line = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )+
  annotate("text", x = 1, y = 0.5, label = paste0("ps01: ", convPvalue(ps01), "\n",
                                                "ps12: ", convPvalue(ps12), "\n",
                                                "ps23: ", convPvalue(ps23), "\n",
                                                "ps34: ", convPvalue(ps34), "\n",
                                                "ps24: ", convPvalue(ps24), "\n", 
                                                "ps14: ", convPvalue(ps14), "\n",
                                                "ps04: ", convPvalue(ps04), "\n"), 
           color = "black", hjust = 0, size = 2) + coord_cartesian(ylim = c(0, 1))

  
fileName <- paste0("log2FC_barplot_psGroup_dTAG_vs_DMSO_up")
width <- panelSize(1.18)*mmToInch
height <- panelSize(1)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```


#### Grouping with P-E number
##### Grouping
```{r}
# temp2 contains genes from group 1, 2, 5, 8 and loop counts
#calculating diff score and log2fc distribution based on p-n numbers

psOver4 <- (temp2 %>% dplyr::filter(num_pe >= 4))$gene
psOver3 <- (temp2 %>% dplyr::filter(num_pe >= 3, num_pe < 4))$gene
psOver2 <- (temp2 %>% dplyr::filter(num_pe >= 2, num_pe < 3))$gene
psOver1 <- (temp2 %>% dplyr::filter(num_pe >= 1, num_pe < 2))$gene
psOver0 <- (temp2 %>% dplyr::filter(num_pe < 1))$gene



## Dividing genes into groups
temp <- geneAnnoData %>% dplyr::select(diff_dTAG_DMSO, distance, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = mean(diff_dTAG_DMSO),
            mean_distance = mean(distance),
            .groups = 'drop')

diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)

maxLog2FC = 2

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% diff.RNA.G1.dTAG$ensembl_gene_id, "2DOWN", "0NO"),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                log2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag) %>%
  drop_na(shrinked_log2FC)

temp <- temp %>% dplyr::mutate(
  psOver = ifelse(gene %in% psOver4, "psOver4",
                  ifelse(gene %in% psOver3, "psOver3",
                         ifelse(gene %in% psOver2, "psOver2",
                                ifelse(gene %in% psOver1, "psOver1",
                                       ifelse(gene %in% psOver0, "psOver0", NA)))))) %>%
  drop_na(psOver)

```

##### GO for each group
```{r}
GOfigDir <- here(figDir, "../GO")
#####################
GO0.df <- as.data.frame(enrichGO(gene = psOver0, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))
GO1.df <- as.data.frame(enrichGO(gene = psOver1, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))
GO2.df <- as.data.frame(enrichGO(gene = psOver2, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))
GO3.df <- as.data.frame(enrichGO(gene = psOver3, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))
GO4.df <- as.data.frame(enrichGO(gene = psOver4, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))

# fwrite(GO0.df, here("GO_PE_group0.tsv"), sep = "\t")
# fwrite(GO1.df, here("GO_PE_group1.tsv"), sep = "\t")
# fwrite(GO2.df, here("GO_PE_group2.tsv"), sep = "\t")
# fwrite(GO3.df, here("GO_PE_group3.tsv"), sep = "\t")
# fwrite(GO4.df, here("GO_PE_group4.tsv"), sep = "\t")


subset0 <- GO0.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 0") %>%
  dplyr::arrange(p.adjust)
subset1 <- GO1.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 1") %>%
  dplyr::arrange(p.adjust)
subset2 <- GO2.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 2") %>%
  dplyr::arrange(p.adjust)
subset3 <- GO3.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 3") %>%
  dplyr::arrange(p.adjust)
subset4 <- GO4.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 4") %>%
  dplyr::arrange(p.adjust)

subset0$GeneRatio <- sapply(strsplit(subset0$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))
subset1$GeneRatio <- sapply(strsplit(subset1$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))
subset2$GeneRatio <- sapply(strsplit(subset2$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))
subset3$GeneRatio <- sapply(strsplit(subset3$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))
subset4$GeneRatio <- sapply(strsplit(subset4$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))

GOlist <- factor(c("GO:0008380", "GO:0006397", "GO:0034470", "GO:0022613", "GO:0016055",
                   "GO:0061138", "GO:0060562", "GO:0007389", "GO:0060485", "GO:0048638", "GO:0045664",
                   "GO:0040029"))

data <- bind_rows(bind_rows(bind_rows(subset0, subset1), subset2), subset4) %>%
  dplyr::filter(ID %in% GOlist)

descOrder <- sort(unique(data$Description))[rev(c(11, 5, 6, 10, 12,
                                                  1,
                                                  3, 7, 4, 2, 9, 8))]

empty_row3 <- data.frame(
  ID = NA,                             # No specific ID
  Description = NA,                    # No description
  GeneRatio = NA,                      # No gene ratio
  p.adjust = NA,                       # No p.adjust value
  group = "Group 3"                   # Group to add as empty column
)

empty_row4 <- data.frame(
  ID = NA,                             # No specific ID
  Description = NA,                    # No description
  GeneRatio = NA,                      # No gene ratio
  p.adjust = NA,                       # No p.adjust value
  group = "Group 4"                   # Group to add as empty column
)

# Append the empty row to your dataset
data <- rbind(data, empty_row3, empty_row4)


data <- data %>% dplyr::rowwise() %>% dplyr::mutate(pValueLog = min(-log10(p.adjust), pValueLogMax))
p <- ggplot(data, aes(x = group, y = factor(Description, levels = descOrder), size = pValueLog, fill = GeneRatio)) + 
  geom_point(shape = 21,        # Ensures a point with an outline
             stroke = 1*ptToMM) + 
  scale_size_continuous(range = c(0.5, 2)) +  # Set min and max point sizes here
  scale_fill_gradient(low = "white", high = "#CB333A",
                      # limits = c(0, 1),
                      oob = scales::squish, # Define gradient colors
                      guide = guide_colorbar(
                        barwidth = 1.5/5.08,  # Adjust width of the color bar
                        barheight = 15/5.08   # Adjust height of the color bar
                      )
  ) + 
  labs(x = NULL, y = NULL) +
  theme_bw() +  # Apply theme_bw first, so custom theme settings come after
  theme(
    panel.background = element_rect(fill = "transparent"),  # Override theme_bw panel
    axis.title = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.text.x = element_text(
      size = fontSizeS,         # Ensure size is set for x-axis text
      family = fontType,
      color = "#000000",
    ),
    axis.text.y = element_text(
      size = fontSizeS,         # Ensure size is set for y-axis text
      family = fontType,
      color = "#000000",
      lineheight = 0.9          # Allows wrapping for y-axis labels to fit into 2 lines
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick * mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick * mmToLineUnit,
      lineend = "square"
    ),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )

fileName <- here(figDir, "..", "GO", "GO_groups_pe")
width <- panelSize(2.65)*mmToInch
height <- panelSize(1.3)*mmToInch
svglite(paste0(fileName, ".svg"), height = height, width = width)
print(p)
dev.off()
```
##### loop score
```{r}
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(psOver ==group1) )$mean_diff_score
  distance2 <- (data %>% dplyr::filter(psOver ==group2) )$mean_diff_score
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

ps01 <- round(getPvalWilcox(temp, "psOver0", "psOver1"), 5)
ps12 <- round(getPvalWilcox(temp, "psOver1", "psOver2"), 5)
ps23 <- round(getPvalWilcox(temp, "psOver2", "psOver3"), 5)
ps34 <- round(getPvalWilcox(temp, "psOver3", "psOver4"), 5)
ps24 <- round(getPvalWilcox(temp, "psOver2", "psOver4"), 5)
ps14 <- round(getPvalWilcox(temp, "psOver1", "psOver4"), 5)
ps04 <- round(getPvalWilcox(temp, "psOver0", "psOver4"), 5)


p <- ggplot(temp, aes(x = psOver, y = mean_diff_score)) + 
    scale_fill_manual(values = c("#777777", "#8B7E65", "#A28452", "#C2884D", "#F28E2C")) +
  geom_violin(aes(fill = psOver), 
              color = "black",
              linewidth = lineMedium * mmToLineUnit, lineend = "square",
              show.legend = FALSE) +
  geom_boxplot(width = 0.3, color = "black",
               linewidth = lineMedium * mmToLineUnit, lineend = "square",
               outlier.shape = NA
  ) + theme_classic() + labs(x = NULL , y = "Average Δ loop score") +
  stat_summary(
    aes(group = psOver), fun = mean,
    geom = "point", shape = 21, size = 0.5,
    fill = "black", color = "black"
  ) +
  geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
  theme(
    axis.title = element_text(
      size = fontSizeM, family = fontType, color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45, hjust = 1, vjust = 1
    ),
    axis.line = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )+
  annotate("text", x = 1, y = 0, label = paste0("ps01: ", convPvalue(ps01), "\n",
                                                "ps12: ", convPvalue(ps12), "\n",
                                                "ps23: ", convPvalue(ps23), "\n",
                                                "ps34: ", convPvalue(ps34), "\n",
                                                "ps24: ", convPvalue(ps24), "\n", 
                                                "ps14: ", convPvalue(ps14), "\n",
                                                "ps04: ", convPvalue(ps04), "\n"), 
           color = "black", hjust = 0, size = 2) +
  coord_cartesian(ylim = c(-0.5, 0.1))


fileName <- paste0("diffScore_barplot_peGroup_dTAG_vs_DMSO")
width <- 33*mmToInch
height <-33*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```

##### log2FC
```{r}
temp <- temp %>% dplyr::mutate(absLog2FC = abs(log2FoldChange))
p <- ggplot(temp, aes(x = absLog2FC, color = psOver)) +
scale_color_manual(values = (c("#777777", "#8B7E65", "#A28452", "#C2884D", "#F28E2C"))) +
  stat_ecdf(size = 0.4, linewidth = lineMedium * mmToLineUnit, lineend = "square" ) + # Use stat_ecdf to plot the empirical CDF
  labs(
    x = "Abs. log2(fold change)",
    y = "Cumulative Probability"
  ) + coord_cartesian(xlim = c(0, 1.5)) +
  theme_classic() + # Clean theme
  theme(
      axis.title = element_text(
        size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.position = "none",
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
    ) + scale_y_continuous(labels = scales::number_format(accuracy = 0.1))
fileName <- paste0("log2FC_cdf_peGroup_dTAG_vs_DMSO")
width <- 33*mmToInch
height <-33*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()


#
 getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(psOver ==group1) )$log2FoldChange
  distance2 <- (data %>% dplyr::filter(psOver ==group2) )$log2FoldChange
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

tempDown <- temp %>% dplyr::filter(log2FoldChange < 0)
 
ps01 <- round(getPvalWilcox(tempDown, "psOver0", "psOver1"), 5)
ps12 <- round(getPvalWilcox(tempDown, "psOver1", "psOver2"), 5)
ps23 <- round(getPvalWilcox(tempDown, "psOver2", "psOver3"), 5)
ps34 <- round(getPvalWilcox(tempDown, "psOver3", "psOver4"), 5)
ps24 <- round(getPvalWilcox(tempDown, "psOver2", "psOver4"), 5)
ps14 <- round(getPvalWilcox(tempDown, "psOver1", "psOver4"), 5)

ps04 <- round(getPvalWilcox(tempDown, "psOver0", "psOver4"), 5)


p <- ggplot(tempDown, aes(x = psOver, y = log2FoldChange)) + 
  scale_fill_manual(values = c("#D9D9D9", "#BFBFBF", "#A6A6A6", "#8C8C8C", "#737373")) + # Five shades of grey
  geom_violin(aes(fill = psOver), 
              color = "black",
              linewidth = lineThick * mmToLineUnit, lineend = "square",
              show.legend = FALSE) +
  geom_boxplot(width = 0.1, color = "black",
               linewidth = lineThick * mmToLineUnit, lineend = "square",
               outlier.shape = NA
  ) + theme_classic() + labs(x = NULL , y = "log2(fold change)") +
  stat_summary(
    aes(group = psOver), fun = mean,
    geom = "point", shape = 21, size = 1,
    fill = "red", color = "black"
  ) +
  geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
  theme(
    axis.title = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45, hjust = 1, vjust = 1
    ),
    axis.line = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )+
  annotate("text", x = 1, y = -0.5, label = paste0("ps01: ", convPvalue(ps01), "\n",
                                                "ps12: ", convPvalue(ps12), "\n",
                                                "ps23: ", convPvalue(ps23), "\n",
                                                "ps34: ", convPvalue(ps34), "\n",
                                                "ps24: ", convPvalue(ps24), "\n", 
                                                "ps14: ", convPvalue(ps14), "\n",
                                                "ps04: ", convPvalue(ps04), "\n"), 
           color = "black", hjust = 0, size = 2) + coord_cartesian(ylim = c(-1, 0))
  
fileName <- paste0("log2FC_barplot_peGroup_dTAG_vs_DMSO_down")
width <- panelSize(1.2)*mmToInch
height <- panelSize(1)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()



tempUp <- temp %>% dplyr::filter(log2FoldChange > 0)
 
ps01 <- round(getPvalWilcox(tempUp, "psOver0", "psOver1"), 5)
ps12 <- round(getPvalWilcox(tempUp, "psOver1", "psOver2"), 5)
ps23 <- round(getPvalWilcox(tempUp, "psOver2", "psOver3"), 5)
ps34 <- round(getPvalWilcox(tempUp, "psOver3", "psOver4"), 5)
ps24 <- round(getPvalWilcox(tempUp, "psOver2", "psOver4"), 5)
ps14 <- round(getPvalWilcox(tempUp, "psOver1", "psOver4"), 5)
ps04 <- round(getPvalWilcox(tempUp, "psOver0", "psOver4"), 5)




p <- ggplot(tempUp, aes(x = psOver, y = log2FoldChange)) + 
  scale_fill_manual(values = c("#D9D9D9", "#BFBFBF", "#A6A6A6", "#8C8C8C", "#737373")) + # Five shades of grey
  geom_violin(aes(fill = psOver), 
              color = "black",
              linewidth = lineThick * mmToLineUnit, lineend = "square",
              show.legend = FALSE) +
  geom_boxplot(width = 0.1, color = "black",
               linewidth = lineThick * mmToLineUnit, lineend = "square",
               outlier.shape = NA
  ) + theme_classic() + labs(x = NULL , y = "log2(fold change)") +
  stat_summary(
    aes(group = psOver), fun = mean,
    geom = "point", shape = 21, size = 1,
    fill = "red", color = "black"
  ) +
  geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
  theme(
    axis.title = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45, hjust = 1, vjust = 1
    ),
    axis.line = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )+
  annotate("text", x = 1, y = 0.5, label = paste0("ps01: ", convPvalue(ps01), "\n",
                                                "ps12: ", convPvalue(ps12), "\n",
                                                "ps23: ", convPvalue(ps23), "\n",
                                                "ps34: ", convPvalue(ps34), "\n",
                                                "ps24: ", convPvalue(ps24), "\n", 
                                                "ps14: ", convPvalue(ps14), "\n",
                                                "ps04: ", convPvalue(ps04), "\n"), 
           color = "black", hjust = 0, size = 2) + coord_cartesian(ylim = c(0, 1))

  
fileName <- paste0("log2FC_barplot_peGroup_dTAG_vs_DMSO_up")
width <- panelSize(1.18)*mmToInch
height <- panelSize(1)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```


#### Grouping with P-P number
##### Grouping
```{r}
# temp2 contains genes from group 1, 2, 5, 8 and loop counts
#calculating diff score and log2fc distribution based on p-n numbers

psOver4 <- (temp2 %>% dplyr::filter(num_pp >= 4))$gene
psOver3 <- (temp2 %>% dplyr::filter(num_pp >= 3, num_pp < 4))$gene
psOver2 <- (temp2 %>% dplyr::filter(num_pp >= 2, num_pp < 3))$gene
psOver1 <- (temp2 %>% dplyr::filter(num_pp >= 1, num_pp < 2))$gene
psOver0 <- (temp2 %>% dplyr::filter(num_pp < 1))$gene



## Dividing genes into groups
temp <- geneAnnoData %>% dplyr::select(diff_dTAG_DMSO, distance, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = mean(diff_dTAG_DMSO),
            mean_distance = mean(distance),
            .groups = 'drop')

diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)

maxLog2FC = 2

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% diff.RNA.G1.dTAG$ensembl_gene_id, "2DOWN", "0NO"),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                log2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag) %>%
  drop_na(shrinked_log2FC)

temp <- temp %>% dplyr::mutate(
  psOver = ifelse(gene %in% psOver4, "psOver4",
                  ifelse(gene %in% psOver3, "psOver3",
                         ifelse(gene %in% psOver2, "psOver2",
                                ifelse(gene %in% psOver1, "psOver1",
                                       ifelse(gene %in% psOver0, "psOver0", NA)))))) %>%
  drop_na(psOver)


```
##### GO for each group
```{r}
GOfigDir <- here(figDir, "../GO")

#####################
GO0.df <- as.data.frame(enrichGO(gene = psOver0, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))
GO1.df <- as.data.frame(enrichGO(gene = psOver1, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))
GO2.df <- as.data.frame(enrichGO(gene = psOver2, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))
GO3.df <- as.data.frame(enrichGO(gene = psOver3, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))
GO4.df <- as.data.frame(enrichGO(gene = psOver4, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))
# 
# fwrite(GO0.df, here("GO_PP_group0.tsv"), sep = "\t")
# fwrite(GO1.df, here("GO_PP_group1.tsv"), sep = "\t")
# fwrite(GO2.df, here("GO_PP_group2.tsv"), sep = "\t")
# fwrite(GO3.df, here("GO_PP_group3.tsv"), sep = "\t")
# fwrite(GO4.df, here("GO_PP_group4.tsv"), sep = "\t")

subset0 <- GO0.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 0") %>%
  dplyr::arrange(p.adjust)
subset1 <- GO1.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 1") %>%
  dplyr::arrange(p.adjust)
subset2 <- GO2.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 2") %>%
  dplyr::arrange(p.adjust)
subset3 <- GO3.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 3") %>%
  dplyr::arrange(p.adjust)
subset4 <- GO4.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 4") %>%
  dplyr::arrange(p.adjust)

subset0$GeneRatio <- sapply(strsplit(subset0$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))
subset1$GeneRatio <- sapply(strsplit(subset1$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))
subset2$GeneRatio <- sapply(strsplit(subset2$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))
subset3$GeneRatio <- sapply(strsplit(subset3$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))
subset4$GeneRatio <- sapply(strsplit(subset4$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))

GOlist <- factor(c("GO:0008380", "GO:0006397", "GO:0034470", "GO:0022613", "GO:0016055",
                   "GO:0061138", "GO:0060562", "GO:0007389", "GO:0060485", "GO:0048638", "GO:0045664"))

data <- bind_rows(subset0, subset1) %>%
  dplyr::filter(ID %in% GOlist)



empty_row2 <- data.frame(
  ID = NA,                             # No specific ID
  Description = NA,                    # No description
  GeneRatio = NA,                      # No gene ratio
  p.adjust = NA,                       # No p.adjust value
  group = "Group 2"                   # Group to add as empty column
)

empty_row3 <- data.frame(
  ID = NA,                             # No specific ID
  Description = NA,                    # No description
  GeneRatio = NA,                      # No gene ratio
  p.adjust = NA,                       # No p.adjust value
  group = "Group 3"                   # Group to add as empty column
)

empty_row4 <- data.frame(
  ID = NA,                             # No specific ID
  Description = NA,                    # No description
  GeneRatio = NA,                      # No gene ratio
  p.adjust = NA,                       # No p.adjust value
  group = "Group 4"                   # Group to add as empty column
)

# Append the empty row to your dataset
data <- rbind(data, empty_row2, empty_row3, empty_row4)
descOrder <- sort(unique(data$Description))[rev(c(12,
                                                  4, 5, 9, 11, 10, 
                                                  2, 6, 3, 1, 8, 7))]
data <- data %>% dplyr::filter(Description %in% descOrder)


data <- data %>% dplyr::rowwise() %>% dplyr::mutate(pValueLog = min(-log10(p.adjust), pValueLogMax))
p <- ggplot(data, aes(x = group, y = factor(Description, levels = descOrder), size = pValueLog, fill = GeneRatio)) + 
  geom_point(shape = 21,        # Ensures a point with an outline
             stroke = 1*ptToMM) + 
  scale_size_continuous(range = c(0.5, 2)) +  # Set min and max point sizes here
  scale_fill_gradient(low = "white", high = "#CB333A",
                      # limits = c(0, 1),
                      oob = scales::squish, # Define gradient colors
                      guide = guide_colorbar(
                        barwidth = 1.5/5.08,  # Adjust width of the color bar
                        barheight = 15/5.08   # Adjust height of the color bar
                      )
  ) + 
  labs(x = NULL, y = NULL) +
  theme_bw() +  # Apply theme_bw first, so custom theme settings come after
  theme(
    panel.background = element_rect(fill = "transparent"),  # Override theme_bw panel
    axis.title = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.text.x = element_text(
      size = fontSizeS,         # Ensure size is set for x-axis text
      family = fontType,
      color = "#000000",
    ),
    axis.text.y = element_text(
      size = fontSizeS,         # Ensure size is set for y-axis text
      family = fontType,
      color = "#000000",
      lineheight = 0.9          # Allows wrapping for y-axis labels to fit into 2 lines
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick * mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick * mmToLineUnit,
      lineend = "square"
    ),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )

fileName <- here(figDir, "..", "GO", "GO_groups_pp")
width <- panelSize(2.65)*mmToInch
height <- panelSize(1.2)*mmToInch
svglite(paste0(fileName, ".svg"), height = height, width = width)
print(p)
dev.off()
```

##### loop score
```{r}
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(psOver ==group1) )$mean_diff_score
  distance2 <- (data %>% dplyr::filter(psOver ==group2) )$mean_diff_score
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

ps01 <- round(getPvalWilcox(temp, "psOver0", "psOver1"), 5)
ps12 <- round(getPvalWilcox(temp, "psOver1", "psOver2"), 5)
ps23 <- round(getPvalWilcox(temp, "psOver2", "psOver3"), 5)
ps34 <- round(getPvalWilcox(temp, "psOver3", "psOver4"), 5)
ps24 <- round(getPvalWilcox(temp, "psOver2", "psOver4"), 5)
ps14 <- round(getPvalWilcox(temp, "psOver1", "psOver4"), 5)
ps04 <- round(getPvalWilcox(temp, "psOver0", "psOver4"), 5)

p <- ggplot(temp, aes(x = psOver, y = mean_diff_score)) + 
    scale_fill_manual(values = rev(c("#777777", "#8B7E65", "#A28452", "#C2884D", "#F28E2C"))) +
  geom_violin(aes(fill = psOver), 
              color = "black",
              linewidth = lineMedium * mmToLineUnit, lineend = "square",
              show.legend = FALSE) +
  geom_boxplot(width = 0.3, color = "black",
               linewidth = lineMedium * mmToLineUnit, lineend = "square",
               outlier.shape = NA
  ) + theme_classic() + labs(x = NULL , y = "Average Δ loop score") +
  stat_summary(
    aes(group = psOver), fun = mean,
    geom = "point", shape = 21, size = 0.5,
    fill = "black", color = "black"
  ) +
  geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
  theme(
    axis.title = element_text(
      size = fontSizeM, family = fontType, color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45, hjust = 1, vjust = 1
    ),
    axis.line = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )+
  annotate("text", x = 1, y = 0, label = paste0("ps01: ", convPvalue(ps01), "\n",
                                                "ps12: ", convPvalue(ps12), "\n",
                                                "ps23: ", convPvalue(ps23), "\n",
                                                "ps34: ", convPvalue(ps34), "\n",
                                                "ps24: ", convPvalue(ps24), "\n", 
                                                "ps14: ", convPvalue(ps14), "\n",
                                                "ps04: ", convPvalue(ps04), "\n"), 
           color = "black", hjust = 0, size = 2) +
  coord_cartesian(ylim = c(-0.5, 0.1))


fileName <- paste0("diffScore_barplot_ppGroup_dTAG_vs_DMSO")
width <- 33*mmToInch
height <-33*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

```
##### log2FC
```{r}
temp <- temp %>% dplyr::mutate(absLog2FC = abs(log2FoldChange))
p <- ggplot(temp, aes(x = absLog2FC, color = psOver)) +
scale_color_manual(values = (rev(c("#777777", "#8B7E65", "#A28452", "#C2884D", "#F28E2C")))) +
  stat_ecdf(size = 0.4, linewidth = lineMedium * mmToLineUnit, lineend = "square" ) + # Use stat_ecdf to plot the empirical CDF
  labs(
    x = "Abs. log2(fold change)",
    y = "Cumulative Probability"
  ) + coord_cartesian(xlim = c(0, 1.5)) +
  theme_classic() + # Clean theme
  theme(
      axis.title = element_text(
        size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.position = "none",
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
    ) + scale_y_continuous(labels = scales::number_format(accuracy = 0.1))
fileName <- paste0("log2FC_cdf_ppGroup_dTAG_vs_DMSO")
width <- 33*mmToInch
height <-33*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()



#
 getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(psOver ==group1) )$log2FoldChange
  distance2 <- (data %>% dplyr::filter(psOver ==group2) )$log2FoldChange
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

tempDown <- temp %>% dplyr::filter(log2FoldChange < 0)
 
ps01 <- round(getPvalWilcox(tempDown, "psOver0", "psOver1"), 5)
ps12 <- round(getPvalWilcox(tempDown, "psOver1", "psOver2"), 5)
ps23 <- round(getPvalWilcox(tempDown, "psOver2", "psOver3"), 5)
ps34 <- round(getPvalWilcox(tempDown, "psOver3", "psOver4"), 5)
ps24 <- round(getPvalWilcox(tempDown, "psOver2", "psOver4"), 5)
ps14 <- round(getPvalWilcox(tempDown, "psOver1", "psOver4"), 5)

ps04 <- round(getPvalWilcox(tempDown, "psOver0", "psOver4"), 5)


p <- ggplot(tempDown, aes(x = psOver, y = log2FoldChange)) + 
  scale_fill_manual(values = c("#D9D9D9", "#BFBFBF", "#A6A6A6", "#8C8C8C", "#737373")) + # Five shades of grey
  geom_violin(aes(fill = psOver), 
              color = "black",
              linewidth = lineThick * mmToLineUnit, lineend = "square",
              show.legend = FALSE) +
  geom_boxplot(width = 0.1, color = "black",
               linewidth = lineThick * mmToLineUnit, lineend = "square",
               outlier.shape = NA
  ) + theme_classic() + labs(x = NULL , y = "log2(fold change)") +
  stat_summary(
    aes(group = psOver), fun = mean,
    geom = "point", shape = 21, size = 1,
    fill = "red", color = "black"
  ) +
  geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
  theme(
    axis.title = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45, hjust = 1, vjust = 1
    ),
    axis.line = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )+
  annotate("text", x = 1, y = -0.5, label = paste0("ps01: ", convPvalue(ps01), "\n",
                                                "ps12: ", convPvalue(ps12), "\n",
                                                "ps23: ", convPvalue(ps23), "\n",
                                                "ps34: ", convPvalue(ps34), "\n",
                                                "ps24: ", convPvalue(ps24), "\n", 
                                                "ps14: ", convPvalue(ps14), "\n",
                                                "ps04: ", convPvalue(ps04), "\n"), 
           color = "black", hjust = 0, size = 2) + coord_cartesian(ylim = c(-1, 0))
  
fileName <- paste0("log2FC_barplot_ppGroup_dTAG_vs_DMSO_down")
width <- panelSize(1.2)*mmToInch
height <- panelSize(1)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()



tempUp <- temp %>% dplyr::filter(log2FoldChange > 0)
 
ps01 <- round(getPvalWilcox(tempUp, "psOver0", "psOver1"), 5)
ps12 <- round(getPvalWilcox(tempUp, "psOver1", "psOver2"), 5)
ps23 <- round(getPvalWilcox(tempUp, "psOver2", "psOver3"), 5)
ps34 <- round(getPvalWilcox(tempUp, "psOver3", "psOver4"), 5)
ps24 <- round(getPvalWilcox(tempUp, "psOver2", "psOver4"), 5)
ps14 <- round(getPvalWilcox(tempUp, "psOver1", "psOver4"), 5)
ps04 <- round(getPvalWilcox(tempUp, "psOver0", "psOver4"), 5)




p <- ggplot(tempUp, aes(x = psOver, y = log2FoldChange)) + 
  scale_fill_manual(values = c("#D9D9D9", "#BFBFBF", "#A6A6A6", "#8C8C8C", "#737373")) + # Five shades of grey
  geom_violin(aes(fill = psOver), 
              color = "black",
              linewidth = lineThick * mmToLineUnit, lineend = "square",
              show.legend = FALSE) +
  geom_boxplot(width = 0.1, color = "black",
               linewidth = lineThick * mmToLineUnit, lineend = "square",
               outlier.shape = NA
  ) + theme_classic() + labs(x = NULL , y = "log2(fold change)") +
  stat_summary(
    aes(group = psOver), fun = mean,
    geom = "point", shape = 21, size = 1,
    fill = "red", color = "black"
  ) +
  geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
  theme(
    axis.title = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45, hjust = 1, vjust = 1
    ),
    axis.line = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )+
  annotate("text", x = 1, y = 0.5, label = paste0("ps01: ", convPvalue(ps01), "\n",
                                                "ps12: ", convPvalue(ps12), "\n",
                                                "ps23: ", convPvalue(ps23), "\n",
                                                "ps34: ", convPvalue(ps34), "\n",
                                                "ps24: ", convPvalue(ps24), "\n", 
                                                "ps14: ", convPvalue(ps14), "\n",
                                                "ps04: ", convPvalue(ps04), "\n"), 
           color = "black", hjust = 0, size = 2) + coord_cartesian(ylim = c(0, 1))

  
fileName <- paste0("log2FC_barplot_ppGroup_dTAG_vs_DMSO_up")
width <- panelSize(1.18)*mmToInch
height <- panelSize(1)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```

### A485
#### Grouping with P-N number
##### Grouping
```{r}
############
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = 0.2,
                                      annoList = c("P-P", "P-E", "P-S", "P-X"))%>% 
  dplyr::mutate(distance = start2 - start1,
                peakID = paste(chrom1, start1, start2, sep = "_"))


# temp2
#calculating diff score and log2fc distribution based on p-n numbers
temp2 <- readRDS(here(resultDir, "gene_loop_link_A485.rds"))

pnOver8 <- (temp2 %>% dplyr::filter(total >= 8))$gene
pnOver6 <- (temp2 %>% dplyr::filter(total >= 6, total < 8))$gene
pnOver4 <- (temp2 %>% dplyr::filter(total >= 4, total < 6))$gene
pnOver2 <- (temp2 %>% dplyr::filter(total >= 2, total < 4))$gene
pnOver0 <- (temp2 %>% dplyr::filter(total < 2))$gene



## Dividing genes into groups
temp <- geneAnnoData %>% dplyr::select(diff_A485_DMSO, distance, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = mean(diff_A485_DMSO),
            mean_distance = mean(distance),
            .groups = 'drop')

diff.RNA <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)

maxLog2FC = 2

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  drop_na(shrinked_log2FC)

temp <- temp %>% dplyr::mutate(
  pnOver = ifelse(gene %in% pnOver8, "pnOver8",
                  ifelse(gene %in% pnOver6, "pnOver6",
                         ifelse(gene %in% pnOver4, "pnOver4",
                                ifelse(gene %in% pnOver2, "pnOver2",
                                       ifelse(gene %in% pnOver0, "pnOver0", NA)))))) %>%
  drop_na(pnOver)
```

##### (SKIP) GO for each group
```{r}
# GOfigDir <- here(figDir, "../GO")
# getGO("pnOver8", GOfigDir, pnOver8)
# getGO("pnOver6", GOfigDir, pnOver6)
# getGO("pnOver4", GOfigDir, pnOver4)
# getGO("pnOver2", GOfigDir, pnOver2)
# getGO("pnOver0", GOfigDir, pnOver0)
# 
# #####################
# GO0.df <- as.data.frame(enrichGO(gene = pnOver0, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO2.df <- as.data.frame(enrichGO(gene = pnOver2, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO4.df <- as.data.frame(enrichGO(gene = pnOver4, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO6.df <- as.data.frame(enrichGO(gene = pnOver6, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO8.df <- as.data.frame(enrichGO(gene = pnOver8, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# 
# 
# subset0 <- GO0.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "pnOver0") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# subset2 <- GO2.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "pnOver2") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# subset4 <- GO4.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "pnOver4") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# subset6 <- GO6.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "pnOver6") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# subset8 <- GO8.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "pnOver8") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# 
# 
# GOlist <- factor(c("GO:0006397", "GO:0008380", "GO:0022613", "GO:0034470",
#                    "GO:0016055", "GO:0007389", "GO:0048562", "GO:0045165", 
#                    "GO:0072001", "GO:0007517", "GO:0048705"))
# 
# data <- bind_rows(bind_rows(bind_rows(subset0, subset2), subset4), subset8) %>%
#   dplyr::filter(ID %in% GOlist)
# 
# p <- ggplot(data, aes(x = group, y = Description, color = p.adjust, size = gr)) + 
#   geom_point() + theme_bw() +
#   scale_color_gradient(low = "red", high = "blue", limits = c(0, 0.05)) +
#   scale_size_continuous(range = c(0, 3)) +
#   labs(x = NULL, y = NULL) +
#   theme(axis.text = element_text(size = 6),  # Set axis text size
#         axis.title = element_text(size = 6), # Set axis title size (if not removed)
#         legend.text = element_text(size = 6), # Set legend text size
#         legend.title = element_text(size = 6)) 
# 
# fileName <- here(figDir, "..", "GO", "GO_groups_pn")
# height = 2
# width = 3.3
# svglite(paste0(fileName, ".svg"), height = height, width = width)
# print(p)
# dev.off()
```


##### loop score
```{r}
# getPvalWilcox <- function(data, group1, group2){
#   distance1 <- (data %>% dplyr::filter(pnOver ==group1) )$mean_diff_score
#   distance2 <- (data %>% dplyr::filter(pnOver ==group2) )$mean_diff_score
#   wil <- wilcox.test(distance1, distance2)
#   return(wil$p.value)
# }
# 
# pv02 <- round(getPvalWilcox(temp, "pnOver0", "pnOver2"), 5)
# pv24 <- round(getPvalWilcox(temp, "pnOver2", "pnOver4"), 5)
# pv46 <- round(getPvalWilcox(temp, "pnOver4", "pnOver6"), 5)
# pv08 <- round(getPvalWilcox(temp, "pnOver0", "pnOver8"), 5)
# pv28 <- round(getPvalWilcox(temp, "pnOver2", "pnOver8"), 5)
# pv48 <- round(getPvalWilcox(temp, "pnOver4", "pnOver8"), 5)
# pv68 <- round(getPvalWilcox(temp, "pnOver6", "pnOver8"), 5)


p <- ggplot(temp, aes(x = pnOver, y = mean_diff_score)) + geom_violin(aes(fill = pnOver), show.legend = FALSE) +
  geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() + 
  stat_summary(aes(group = pnOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  geom_hline(yintercept = 0)
  # annotate("text", x = 1, y = 0, label = paste0("pv02: ", convPvalue(pv02), "\n",
  #                                               "pv24: ", convPvalue(pv24), "\n",
  #                                               "pv46: ", convPvalue(pv46), "\n",
  #                                               "pv68: ", convPvalue(pv68), "\n",
  #                                               "pv48: ", convPvalue(pv48), "\n",
  #                                               "pv28: ", convPvalue(pv28), "\n",
  #                                               "pv08: ", convPvalue(pv08), "\n"), 
  #          color = "black", hjust = 0, size = 3)

   
fileName <- paste0("diffScore_barplot_pnGroup_A485_vs_DMSO")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```

##### log2FC
```{r}
#
#  getPvalWilcox <- function(data, group1, group2){
#   distance1 <- (data %>% dplyr::filter(pnOver ==group1) )$log2FoldChange
#   distance2 <- (data %>% dplyr::filter(pnOver ==group2) )$log2FoldChange
#   wil <- wilcox.test(distance1, distance2)
#   return(wil$p.value)
# }
# 
tempDown <- temp %>% dplyr::filter(log2FoldChange < 0)
#  
# pv02 <- round(getPvalWilcox(tempDown, "pnOver0", "pnOver2"), 5)
# pv04 <- round(getPvalWilcox(tempDown, "pnOver0", "pnOver4"), 5)
# pv06 <- round(getPvalWilcox(tempDown, "pnOver0", "pnOver6"), 5)
# pv08 <- round(getPvalWilcox(tempDown, "pnOver0", "pnOver8"), 5)
# pv24 <- round(getPvalWilcox(tempDown, "pnOver2", "pnOver4"), 5)
# pv26 <- round(getPvalWilcox(tempDown, "pnOver2", "pnOver6"), 5)
# pv28 <- round(getPvalWilcox(tempDown, "pnOver2", "pnOver8"), 5)
# pv46 <- round(getPvalWilcox(tempDown, "pnOver4", "pnOver6"), 5)
# pv48 <- round(getPvalWilcox(tempDown, "pnOver4", "pnOver8"), 5)
# pv68 <- round(getPvalWilcox(tempDown, "pnOver6", "pnOver8"), 5)




p <- ggplot(tempDown, aes(x = pnOver, y = log2FoldChange)) + geom_violin(aes(fill = pnOver), show.legend = FALSE) +
  geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() + 
  stat_summary(aes(group = pnOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  geom_hline(yintercept = 0)
  # annotate("text", x = 1, y = -0.5, label = paste0("pv02: ", convPvalue(pv02), "\n",
  #                                                  "pv04: ", convPvalue(pv04), "\n",
  #                                                  "pv06: ", convPvalue(pv06), "\n",
  #                                                  "pv08: ", convPvalue(pv08), "\n",
  #                                                  "pv24: ", convPvalue(pv24), "\n",
  #                                                  "pv26: ", convPvalue(pv26), "\n",
  #                                                  "pv28: ", convPvalue(pv28), "\n",
  #                                                  "pv46: ", convPvalue(pv46), "\n",
  #                                                  "pv48: ", convPvalue(pv48), "\n",
  #                                                  "pv68: ", convPvalue(pv68), "\n"), 
  #          color = "black", hjust = 0, size = 3) + coord_cartesian(ylim = c(-1, 0))

  
fileName <- paste0("log2FC_barplot_pnGroup_A485_vs_DMSO_down")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()



tempUp <- temp %>% dplyr::filter(log2FoldChange > 0)
#  
# pv02 <- round(getPvalWilcox(tempUp, "pnOver0", "pnOver2"), 5)
# pv04 <- round(getPvalWilcox(tempUp, "pnOver0", "pnOver4"), 5)
# pv06 <- round(getPvalWilcox(tempUp, "pnOver0", "pnOver6"), 5)
# pv08 <- round(getPvalWilcox(tempUp, "pnOver0", "pnOver8"), 5)
# pv24 <- round(getPvalWilcox(tempUp, "pnOver2", "pnOver4"), 5)
# pv26 <- round(getPvalWilcox(tempUp, "pnOver2", "pnOver6"), 5)
# pv28 <- round(getPvalWilcox(tempUp, "pnOver2", "pnOver8"), 5)
# pv46 <- round(getPvalWilcox(tempUp, "pnOver4", "pnOver6"), 5)
# pv48 <- round(getPvalWilcox(tempUp, "pnOver4", "pnOver8"), 5)
# pv68 <- round(getPvalWilcox(tempUp, "pnOver6", "pnOver8"), 5)
# 



p <- ggplot(tempUp, aes(x = pnOver, y = log2FoldChange)) + geom_violin(aes(fill = pnOver), show.legend = FALSE) +
  geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() + 
  stat_summary(aes(group = pnOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  geom_hline(yintercept = 0)
  # annotate("text", x = 1, y = 0.5, label = paste0("pv02: ", convPvalue(pv02), "\n",
  #                                                  "pv04: ", convPvalue(pv04), "\n",
  #                                                  "pv06: ", convPvalue(pv06), "\n",
  #                                                  "pv08: ", convPvalue(pv08), "\n",
  #                                                  "pv24: ", convPvalue(pv24), "\n",
  #                                                  "pv26: ", convPvalue(pv26), "\n",
  #                                                  "pv28: ", convPvalue(pv28), "\n",
  #                                                  "pv46: ", convPvalue(pv46), "\n",
  #                                                  "pv48: ", convPvalue(pv48), "\n",
  #                                                  "pv68: ", convPvalue(pv68), "\n"), 
  #          color = "black", hjust = 0, size = 3) + coord_cartesian(ylim = c(0, 1))

  
fileName <- paste0("log2FC_barplot_pnGroup_A485_vs_DMSO_up")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```

#### Grouping with P-S number
##### Grouping
```{r}
# temp2 contains genes from group 1, 2, 5, 8 and loop counts
#calculating diff score and log2fc distribution based on p-n numbers

psOver4 <- (temp2 %>% dplyr::filter(num_ps >= 4))$gene
psOver3 <- (temp2 %>% dplyr::filter(num_ps >= 3, num_ps < 4))$gene
psOver2 <- (temp2 %>% dplyr::filter(num_ps >= 2, num_ps < 3))$gene
psOver1 <- (temp2 %>% dplyr::filter(num_ps >= 1, num_ps < 2))$gene
psOver0 <- (temp2 %>% dplyr::filter(num_ps < 1))$gene




## Dividing genes into groups
temp <- geneAnnoData %>% dplyr::select(diff_A485_DMSO, distance, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = mean(diff_A485_DMSO),
            mean_distance = mean(distance),
            .groups = 'drop')

diff.RNA <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)

maxLog2FC = 2

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  drop_na(shrinked_log2FC)


temp <- temp %>% dplyr::mutate(
  psOver = ifelse(gene %in% psOver4, "psOver4",
                  ifelse(gene %in% psOver3, "psOver3",
                         ifelse(gene %in% psOver2, "psOver2",
                                ifelse(gene %in% psOver1, "psOver1",
                                       ifelse(gene %in% psOver0, "psOver0", NA)))))) %>%
  drop_na(psOver)

```

##### (SKIP) GO for each group
```{r}
# GOfigDir <- here(figDir, "../GO")
# getGO("psOver4", GOfigDir, psOver4)
# getGO("psOver3", GOfigDir, psOver3)
# getGO("psOver2", GOfigDir, psOver2)
# getGO("psOver1", GOfigDir, psOver1)
# getGO("psOver0", GOfigDir, psOver0)
# 
# #####################
# GO0.df <- as.data.frame(enrichGO(gene = psOver0, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO1.df <- as.data.frame(enrichGO(gene = psOver1, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO2.df <- as.data.frame(enrichGO(gene = psOver2, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO3.df <- as.data.frame(enrichGO(gene = psOver3, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO4.df <- as.data.frame(enrichGO(gene = psOver4, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# 
# 
# subset0 <- GO0.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver0") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# subset1 <- GO1.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver1") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# subset2 <- GO2.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver2") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# subset3 <- GO3.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver3") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# subset4 <- GO4.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver4") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# 
# 
# GOlist <- factor(c("GO:0006397", "GO:0008380", "GO:0022613", "GO:0034470",
#                    "GO:0016055", "GO:0007389", "GO:0048562", "GO:0045165", 
#                    "GO:0072001", "GO:0007517", "GO:0048705", "GO:0003002"))
# 
# data <- bind_rows(bind_rows(bind_rows(bind_rows(subset0, subset1), subset2), subset3), subset4) %>%
#   dplyr::filter(ID %in% GOlist)
# 
# p <- ggplot(data, aes(x = group, y = Description, color = p.adjust, size = gr)) + 
#   geom_point() + theme_bw() +
#   scale_color_gradient(low = "red", high = "blue", limits = c(0, 0.05)) +
#   scale_size_continuous(range = c(0, 3)) +
#   labs(x = NULL, y = NULL) +
#   theme(axis.text = element_text(size = 6),  # Set axis text size
#         axis.title = element_text(size = 6), # Set axis title size (if not removed)
#         legend.text = element_text(size = 6), # Set legend text size
#         legend.title = element_text(size = 6)) 
# 
# fileName <- here(figDir, "..", "GO", "GO_groups_ps")
# height = 2
# width = 3.4
# svglite(paste0(fileName, ".svg"), height = height, width = width)
# print(p)
# dev.off()
```

##### loop score
```{r}
# getPvalWilcox <- function(data, group1, group2){
#   distance1 <- (data %>% dplyr::filter(psOver ==group1) )$mean_diff_score
#   distance2 <- (data %>% dplyr::filter(psOver ==group2) )$mean_diff_score
#   wil <- wilcox.test(distance1, distance2)
#   return(wil$p.value)
# }
# 
# ps01 <- round(getPvalWilcox(temp, "psOver0", "psOver1"), 5)
# ps12 <- round(getPvalWilcox(temp, "psOver1", "psOver2"), 5)
# ps23 <- round(getPvalWilcox(temp, "psOver2", "psOver3"), 5)
# ps34 <- round(getPvalWilcox(temp, "psOver3", "psOver4"), 5)
# ps24 <- round(getPvalWilcox(temp, "psOver2", "psOver4"), 5)
# ps14 <- round(getPvalWilcox(temp, "psOver1", "psOver4"), 5)
# ps04 <- round(getPvalWilcox(temp, "psOver0", "psOver4"), 5)
# 

p <- ggplot(temp, aes(x = psOver, y = mean_diff_score)) + geom_violin(aes(fill = psOver), show.legend = FALSE) +
  geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() + 
  stat_summary(aes(group = psOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  geom_hline(yintercept = 0)
  # annotate("text", x = 1, y = 0, label = paste0("ps01: ", convPvalue(ps01), "\n",
  #                                               "ps12: ", convPvalue(ps12), "\n",
  #                                               "ps23: ", convPvalue(ps23), "\n",
  #                                               "ps34: ", convPvalue(ps34), "\n",
  #                                               "ps24: ", convPvalue(ps24), "\n", 
  #                                               "ps14: ", convPvalue(ps14), "\n",
  #                                               "ps04: ", convPvalue(ps04), "\n"), 
  #          color = "black", hjust = 0, size = 3)

  
fileName <- paste0("diffScore_barplot_psGroup_A485_vs_DMSO")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```

##### log2FC
```{r}
#
#  getPvalWilcox <- function(data, group1, group2){
#   distance1 <- (data %>% dplyr::filter(psOver ==group1) )$log2FoldChange
#   distance2 <- (data %>% dplyr::filter(psOver ==group2) )$log2FoldChange
#   wil <- wilcox.test(distance1, distance2)
#   return(wil$p.value)
# }

tempDown <- temp %>% dplyr::filter(log2FoldChange < 0)
#  
# ps01 <- round(getPvalWilcox(tempDown, "psOver0", "psOver1"), 5)
# ps12 <- round(getPvalWilcox(tempDown, "psOver1", "psOver2"), 5)
# ps23 <- round(getPvalWilcox(tempDown, "psOver2", "psOver3"), 5)
# ps34 <- round(getPvalWilcox(tempDown, "psOver3", "psOver4"), 5)
# ps24 <- round(getPvalWilcox(tempDown, "psOver2", "psOver4"), 5)
# ps14 <- round(getPvalWilcox(tempDown, "psOver1", "psOver4"), 5)
# 
# ps04 <- round(getPvalWilcox(tempDown, "psOver0", "psOver4"), 5)


p <- ggplot(tempDown, aes(x = psOver, y = log2FoldChange)) + geom_violin(aes(fill = psOver), show.legend = FALSE) +
  geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() + 
  stat_summary(aes(group = psOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  geom_hline(yintercept = 0)
  # annotate("text", x = 1, y = -0.5, label = paste0("ps01: ", convPvalue(ps01), "\n",
  #                                               "ps12: ", convPvalue(ps12), "\n",
  #                                               "ps23: ", convPvalue(ps23), "\n",
  #                                               "ps34: ", convPvalue(ps34), "\n",
  #                                               "ps24: ", convPvalue(ps24), "\n",
  #                                               "ps14: ", convPvalue(ps14), "\n",
  #                                               "ps04: ", convPvalue(ps04), "\n"), 
  #          color = "black", hjust = 0, size = 3) + coord_cartesian(ylim = c(-1, 0))

  
fileName <- paste0("log2FC_barplot_psGroup_A485_vs_DMSO_down")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()



tempUp <- temp %>% dplyr::filter(log2FoldChange > 0)
#  
# ps01 <- round(getPvalWilcox(tempUp, "psOver0", "psOver1"), 5)
# ps12 <- round(getPvalWilcox(tempUp, "psOver1", "psOver2"), 5)
# ps23 <- round(getPvalWilcox(tempUp, "psOver2", "psOver3"), 5)
# ps34 <- round(getPvalWilcox(tempUp, "psOver3", "psOver4"), 5)
# ps24 <- round(getPvalWilcox(tempUp, "psOver2", "psOver4"), 5)
# ps14 <- round(getPvalWilcox(tempUp, "psOver1", "psOver4"), 5)
# ps04 <- round(getPvalWilcox(tempUp, "psOver0", "psOver4"), 5)
# 



p <- ggplot(tempUp, aes(x = psOver, y = log2FoldChange)) + geom_violin(aes(fill = psOver), show.legend = FALSE) +
  geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() + 
  stat_summary(aes(group = psOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  geom_hline(yintercept = 0)
  # annotate("text", x = 1, y = 0.5, label = paste0("ps01: ", convPvalue(ps01), "\n",
  #                                               "ps12: ", convPvalue(ps12), "\n",
  #                                               "ps23: ", convPvalue(ps23), "\n",
  #                                               "ps34: ", convPvalue(ps34), "\n",
  #                                               "ps24: ", convPvalue(ps24), "\n",
  #                                               "ps14: ", convPvalue(ps14), "\n",
  #                                               "ps04: ", convPvalue(ps04), "\n"), 
  #          color = "black", hjust = 0, size = 3) + coord_cartesian(ylim = c(0, 1))

  
fileName <- paste0("log2FC_barplot_psGroup_A485_vs_DMSO_up")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```


#### Grouping with P-E number
##### Grouping
```{r}
# temp2 contains genes from group 1, 2, 5, 8 and loop counts
#calculating diff score and log2fc distribution based on p-n numbers

peOver4 <- (temp2 %>% dplyr::filter(num_pe >= 4))$gene
peOver3 <- (temp2 %>% dplyr::filter(num_pe >= 3, num_pe < 4))$gene
peOver2 <- (temp2 %>% dplyr::filter(num_pe >= 2, num_pe < 3))$gene
peOver1 <- (temp2 %>% dplyr::filter(num_pe >= 1, num_pe < 2))$gene
peOver0 <- (temp2 %>% dplyr::filter(num_pe < 1))$gene



## Dividing genes into groups
temp <- geneAnnoData %>% dplyr::select(diff_A485_DMSO, distance, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = mean(diff_A485_DMSO),
            mean_distance = mean(distance),
            .groups = 'drop')

diff.RNA <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)

maxLog2FC = 2

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  drop_na(shrinked_log2FC)


temp <- temp %>% dplyr::mutate(
  peOver = ifelse(gene %in% peOver4, "peOver4",
                  ifelse(gene %in% peOver3, "peOver3",
                         ifelse(gene %in% peOver2, "peOver2",
                                ifelse(gene %in% peOver1, "peOver1",
                                       ifelse(gene %in% peOver0, "peOver0", NA)))))) %>%
  drop_na(peOver)

```

##### (SKIP) GO for each group
```{r}
# GOfigDir <- here(figDir, "../GO")
# getGO("peOver4", GOfigDir, psOver4)
# getGO("peOver3", GOfigDir, psOver3)
# getGO("peOver2", GOfigDir, psOver2)
# getGO("peOver1", GOfigDir, psOver1)
# getGO("peOver0", GOfigDir, psOver0)
# 
# #####################
# GO0.df <- as.data.frame(enrichGO(gene = psOver0, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO1.df <- as.data.frame(enrichGO(gene = psOver1, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO2.df <- as.data.frame(enrichGO(gene = psOver2, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO3.df <- as.data.frame(enrichGO(gene = psOver3, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO4.df <- as.data.frame(enrichGO(gene = psOver4, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# 
# 
# subset0 <- GO0.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver0") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# subset1 <- GO1.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver1") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# subset2 <- GO2.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver2") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# subset3 <- GO3.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver3") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# subset4 <- GO4.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver4") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# 
# 
# GOlist <- factor(c("GO:0006397", "GO:0008380", "GO:0022613", "GO:0034470",
#                    "GO:0016055", "GO:0007389", "GO:0048562", "GO:0045165", 
#                    "GO:0072001", "GO:0007517", "GO:0048705", 
#                    "GO:0040029", "GO:0010165"))
# 
# data <- bind_rows(bind_rows(bind_rows(subset0, subset1), subset2), subset4) %>%
#   dplyr::filter(ID %in% GOlist)
# 
# p <- ggplot(data, aes(x = group, y = Description, color = p.adjust, size = gr)) + 
#   geom_point() + theme_bw() +
#   scale_color_gradient(low = "red", high = "blue", limits = c(0, 0.05)) +
#   scale_size_continuous(range = c(0, 3)) +
#   labs(x = NULL, y = NULL) +
#   theme(axis.text = element_text(size = 6),  # Set axis text size
#         axis.title = element_text(size = 6), # Set axis title size (if not removed)
#         legend.text = element_text(size = 6), # Set legend text size
#         legend.title = element_text(size = 6)) 
# 
# fileName <- here(figDir, "..", "GO", "GO_groups_pe")
# height = 2
# width = 3.4
# svglite(paste0(fileName, ".svg"), height = height, width = width)
# print(p)
# dev.off()
```
##### loop score
```{r}
# getPvalWilcox <- function(data, group1, group2){
#   distance1 <- (data %>% dplyr::filter(psOver ==group1) )$mean_diff_score
#   distance2 <- (data %>% dplyr::filter(psOver ==group2) )$mean_diff_score
#   wil <- wilcox.test(distance1, distance2)
#   return(wil$p.value)
# }
# 
# ps01 <- round(getPvalWilcox(temp, "psOver0", "psOver1"), 5)
# ps12 <- round(getPvalWilcox(temp, "psOver1", "psOver2"), 5)
# ps23 <- round(getPvalWilcox(temp, "psOver2", "psOver3"), 5)
# ps34 <- round(getPvalWilcox(temp, "psOver3", "psOver4"), 5)
# ps24 <- round(getPvalWilcox(temp, "psOver2", "psOver4"), 5)
# ps14 <- round(getPvalWilcox(temp, "psOver1", "psOver4"), 5)
# ps04 <- round(getPvalWilcox(temp, "psOver0", "psOver4"), 5)


p <- ggplot(temp, aes(x = peOver, y = mean_diff_score)) + geom_violin(aes(fill = peOver), show.legend = FALSE) +
  geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() + 
  stat_summary(aes(group = peOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  geom_hline(yintercept = 0)
  # annotate("text", x = 1, y = 0, label = paste0("ps01: ", convPvalue(ps01), "\n",
  #                                               "ps12: ", convPvalue(ps12), "\n",
  #                                               "ps23: ", convPvalue(ps23), "\n",
  #                                               "ps34: ", convPvalue(ps34), "\n",
  #                                               "ps24: ", convPvalue(ps24), "\n",
  #                                               "ps14: ",convPvalue( ps14), "\n",
  #                                               "ps04: ",convPvalue( ps04), "\n"), 
  #          color = "black", hjust = 0, size = 3)

  
fileName <- paste0("diffScore_barplot_peGroup_A485_vs_DMSO")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```

##### log2FC
```{r}
#
#  getPvalWilcox <- function(data, group1, group2){
#   distance1 <- (data %>% dplyr::filter(psOver ==group1) )$log2FoldChange
#   distance2 <- (data %>% dplyr::filter(psOver ==group2) )$log2FoldChange
#   wil <- wilcox.test(distance1, distance2)
#   return(wil$p.value)
# }
# 
tempDown <- temp %>% dplyr::filter(log2FoldChange < 0)
#  
# ps01 <- round(getPvalWilcox(tempDown, "psOver0", "psOver1"), 5)
# ps12 <- round(getPvalWilcox(tempDown, "psOver1", "psOver2"), 5)
# ps23 <- round(getPvalWilcox(tempDown, "psOver2", "psOver3"), 5)
# ps34 <- round(getPvalWilcox(tempDown, "psOver3", "psOver4"), 5)
# ps24 <- round(getPvalWilcox(tempDown, "psOver2", "psOver4"), 5)
# ps14 <- round(getPvalWilcox(tempDown, "psOver1", "psOver4"), 5)
# ps04 <- round(getPvalWilcox(tempDown, "psOver0", "psOver4"), 5)


p <- ggplot(tempDown, aes(x = peOver, y = log2FoldChange)) + geom_violin(aes(fill = peOver), show.legend = FALSE) +
  geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() + 
  stat_summary(aes(group = peOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  geom_hline(yintercept = 0)
  # annotate("text", x = 1, y = -0.5, label = paste0("ps01: ", convPvalue(ps01), "\n",
  #                                               "ps12: ", convPvalue(ps12), "\n",
  #                                               "ps23: ", convPvalue(ps23), "\n",
  #                                               "ps34: ", convPvalue(ps34), "\n",
  #                                               "ps24: ", convPvalue(ps24), "\n",
  #                                               "ps14: ", convPvalue(ps14), "\n",
  #                                               "ps04: ", convPvalue(ps04), "\n"), 
  #          color = "black", hjust = 0, size = 3) + coord_cartesian(ylim = c(-1, 0))

  
fileName <- paste0("log2FC_barplot_peGroup_A485_vs_DMSO_down")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()



tempUp <- temp %>% dplyr::filter(log2FoldChange > 0)
 
# ps01 <- round(getPvalWilcox(tempUp, "psOver0", "psOver1"), 5)
# ps12 <- round(getPvalWilcox(tempUp, "psOver1", "psOver2"), 5)
# ps23 <- round(getPvalWilcox(tempUp, "psOver2", "psOver3"), 5)
# ps34 <- round(getPvalWilcox(tempUp, "psOver3", "psOver4"), 5)
# ps24 <- round(getPvalWilcox(tempUp, "psOver2", "psOver4"), 5)
# ps14 <- round(getPvalWilcox(tempUp, "psOver1", "psOver4"), 5)
# ps04 <- round(getPvalWilcox(tempUp, "psOver0", "psOver4"), 5)
# 




p <- ggplot(tempUp, aes(x = peOver, y = log2FoldChange)) + geom_violin(aes(fill = peOver), show.legend = FALSE) +
  geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() + 
  stat_summary(aes(group = peOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  geom_hline(yintercept = 0)
  # annotate("text", x = 1, y = 0.5, label = paste0("ps01: ", convPvalue(ps01), "\n",
  #                                               "ps12: ", convPvalue(ps12), "\n",
  #                                               "ps23: ", convPvalue(ps23), "\n",
  #                                               "ps34: ", convPvalue(ps34), "\n",
  #                                               "ps24: ", convPvalue(ps24), "\n",
  #                                               "ps14: ", convPvalue(ps14), "\n",
  #                                               "ps04: ", convPvalue(ps04), "\n"), 
  #          color = "black", hjust = 0, size = 3) + coord_cartesian(ylim = c(0, 1))

  
fileName <- paste0("log2FC_barplot_peGroup_A485_vs_DMSO_up")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```


#### Grouping with P-P number
##### Grouping
```{r}
# temp2 contains genes from group 1, 2, 5, 8 and loop counts
#calculating diff score and log2fc distribution based on p-n numbers
ppOver4 <- (temp2 %>% dplyr::filter(num_pp >= 4))$gene
ppOver3 <- (temp2 %>% dplyr::filter(num_pp >= 3, num_pp < 4))$gene
ppOver2 <- (temp2 %>% dplyr::filter(num_pp >= 2, num_pp < 3))$gene
ppOver1 <- (temp2 %>% dplyr::filter(num_pp >= 1, num_pp < 2))$gene
ppOver0 <- (temp2 %>% dplyr::filter(num_pp < 1))$gene



## Dividing genes into groups
temp <- geneAnnoData %>% dplyr::select(diff_A485_DMSO, distance, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = mean(diff_A485_DMSO),
            mean_distance = mean(distance),
            .groups = 'drop')

diff.RNA <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)

maxLog2FC = 2

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  drop_na(shrinked_log2FC)


temp <- temp %>% dplyr::mutate(
  ppOver = ifelse(gene %in% ppOver4, "ppOver4",
                  ifelse(gene %in% ppOver3, "ppOver3",
                         ifelse(gene %in% ppOver2, "ppOver2",
                                ifelse(gene %in% ppOver1, "ppOver1",
                                       ifelse(gene %in% ppOver0, "ppOver0", NA)))))) %>%
  drop_na(ppOver)
```
##### (SKIP) GO for each group
```{r}
# GOfigDir <- here(figDir, "../GO")
# getGO("ppOver4", GOfigDir, psOver4)
# getGO("ppOver3", GOfigDir, psOver3)
# getGO("ppOver2", GOfigDir, psOver2)
# getGO("ppOver1", GOfigDir, psOver1)
# getGO("ppOver0", GOfigDir, psOver0)
# 
# #####################
# GO0.df <- as.data.frame(enrichGO(gene = psOver0, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO1.df <- as.data.frame(enrichGO(gene = psOver1, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO2.df <- as.data.frame(enrichGO(gene = psOver2, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO3.df <- as.data.frame(enrichGO(gene = psOver3, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO4.df <- as.data.frame(enrichGO(gene = psOver4, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# 
# 
# subset0 <- GO0.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver0") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# subset1 <- GO1.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver1") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# subset2 <- GO2.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver2") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# subset3 <- GO3.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver3") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# subset4 <- GO4.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver4") %>%
#   dplyr::mutate(
#     gr = sapply(GeneRatio, function(x) {
#       # Split the string by "/"
#       parts <- unlist(strsplit(x, "/"))
#       # Convert to numeric and perform the division
#       as.numeric(parts[1]) / as.numeric(parts[2])
#     })
#   ) %>% dplyr::arrange(desc(gr))
# 
# 
# GOlist <- factor(c("GO:0006397", "GO:0008380", "GO:0022613", "GO:0034470",
#                    "GO:0016055", "GO:0007389", "GO:0048562", "GO:0045165", 
#                    "GO:0072001", "GO:0007517", "GO:0048705", "GO:0003002", "GO:0009411"))
# 
# data <- bind_rows(bind_rows(subset0, subset1), subset3) %>%
#   dplyr::filter(ID %in% GOlist)
# 
# p <- ggplot(data, aes(x = group, y = Description, color = p.adjust, size = gr)) + 
#   geom_point() + theme_bw() +
#   scale_color_gradient(low = "red", high = "blue", limits = c(0, 0.05)) +
#   scale_size_continuous(range = c(0, 3)) +
#   labs(x = NULL, y = NULL) +
#   theme(axis.text = element_text(size = 6),  # Set axis text size
#         axis.title = element_text(size = 6), # Set axis title size (if not removed)
#         legend.text = element_text(size = 6), # Set legend text size
#         legend.title = element_text(size = 6)) 
# 
# fileName <- here(figDir, "..", "GO", "GO_groups_pp")
# height = 2
# width = 3.4
# svglite(paste0(fileName, ".svg"), height = height, width = width)
# print(p)
# dev.off()
```

##### loop score
```{r}
# getPvalWilcox <- function(data, group1, group2){
#   distance1 <- (data %>% dplyr::filter(psOver ==group1) )$mean_diff_score
#   distance2 <- (data %>% dplyr::filter(psOver ==group2) )$mean_diff_score
#   wil <- wilcox.test(distance1, distance2)
#   return(wil$p.value)
# }
# 
# ps01 <- round(getPvalWilcox(temp, "psOver0", "psOver1"), 5)
# ps12 <- round(getPvalWilcox(temp, "psOver1", "psOver2"), 5)
# ps23 <- round(getPvalWilcox(temp, "psOver2", "psOver3"), 5)
# ps34 <- round(getPvalWilcox(temp, "psOver3", "psOver4"), 5)
# ps24 <- round(getPvalWilcox(temp, "psOver2", "psOver4"), 5)
# ps14 <- round(getPvalWilcox(temp, "psOver1", "psOver4"), 5)
# ps04 <- round(getPvalWilcox(temp, "psOver0", "psOver4"), 5)


p <- ggplot(temp, aes(x = ppOver, y = mean_diff_score)) + geom_violin(aes(fill = ppOver), show.legend = FALSE) +
  geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() + 
  stat_summary(aes(group = ppOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  geom_hline(yintercept = 0)
  # annotate("text", x = 1, y = 0, label = paste0("ps01: ", convPvalue(ps01), "\n",
  #                                               "ps12: ", convPvalue(ps12), "\n",
  #                                               "ps23: ", convPvalue(ps23), "\n",
  #                                               "ps34: ", convPvalue(ps34), "\n",
  #                                               "ps24: ", convPvalue(ps24), "\n",
  #                                               "ps14: ", convPvalue(ps14), "\n",
  #                                               "ps04: ", convPvalue(ps04), "\n"), 
  #          color = "black", hjust = 0, size = 3)

  
fileName <- paste0("diffScore_barplot_ppGroup_A485_vs_DMSO")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```
##### log2FC
```{r}
#
#  getPvalWilcox <- function(data, group1, group2){
#   distance1 <- (data %>% dplyr::filter(psOver ==group1) )$log2FoldChange
#   distance2 <- (data %>% dplyr::filter(psOver ==group2) )$log2FoldChange
#   wil <- wilcox.test(distance1, distance2)
#   return(wil$p.value)
# }

tempDown <- temp %>% dplyr::filter(log2FoldChange < 0)
#  
# ps01 <- round(getPvalWilcox(tempDown, "psOver0", "psOver1"), 5)
# ps12 <- round(getPvalWilcox(tempDown, "psOver1", "psOver2"), 5)
# ps23 <- round(getPvalWilcox(tempDown, "psOver2", "psOver3"), 5)
# ps34 <- round(getPvalWilcox(tempDown, "psOver3", "psOver4"), 5)
# ps24 <- round(getPvalWilcox(tempDown, "psOver2", "psOver4"), 5)
# ps14 <- round(getPvalWilcox(tempDown, "psOver1", "psOver4"), 5)
# ps04 <- round(getPvalWilcox(tempDown, "psOver0", "psOver4"), 5)



p <- ggplot(tempDown, aes(x = ppOver, y = log2FoldChange)) + geom_violin(aes(fill = ppOver), show.legend = FALSE) +
  geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() + 
  stat_summary(aes(group = ppOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  geom_hline(yintercept = 0)
  # annotate("text", x = 1, y = -0.5, label = paste0("ps01: ", convPvalue(ps01), "\n",
  #                                               "ps12: ", convPvalue(ps12), "\n",
  #                                               "ps23: ", convPvalue(ps23), "\n",
  #                                               "ps34: ", convPvalue(ps34), "\n",
  #                                               "ps24: ", convPvalue(ps24), "\n",
  #                                               "ps14: ", convPvalue(ps14), "\n",
  #                                               "ps04: ", convPvalue(ps04), "\n"), 
  #          color = "black", hjust = 0, size = 3) + coord_cartesian(ylim = c(-1, 0))

  
fileName <- paste0("log2FC_barplot_ppGroup_A485_vs_DMSO_down")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()



tempUp <- temp %>% dplyr::filter(log2FoldChange > 0)
#  
# ps01 <- round(getPvalWilcox(tempUp, "psOver0", "psOver1"), 5)
# ps12 <- round(getPvalWilcox(tempUp, "psOver1", "psOver2"), 5)
# ps23 <- round(getPvalWilcox(tempUp, "psOver2", "psOver3"), 5)
# ps34 <- round(getPvalWilcox(tempUp, "psOver3", "psOver4"), 5)
# ps24 <- round(getPvalWilcox(tempUp, "psOver2", "psOver4"), 5)
# ps14 <- round(getPvalWilcox(tempUp, "psOver1", "psOver4"), 5)
# ps04 <- round(getPvalWilcox(tempUp, "psOver0", "psOver4"), 5)
# 
# 



p <- ggplot(tempUp, aes(x = ppOver, y = log2FoldChange)) + geom_violin(aes(fill = ppOver), show.legend = FALSE) +
  geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() + 
  stat_summary(aes(group = ppOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  geom_hline(yintercept = 0)
  # annotate("text", x = 1, y = 0.5, label = paste0("ps01: ", convPvalue(ps01), "\n",
  #                                               "ps12: ", convPvalue(ps12), "\n",
  #                                               "ps23: ", convPvalue(ps23), "\n",
  #                                               "ps34: ", convPvalue(ps34), "\n",
  #                                               "ps24: ", convPvalue(ps24), "\n",
  #                                               "ps14: ", convPvalue(ps14), "\n",
  #                                               "ps04: ", convPvalue(ps04), "\n"), 
  #          color = "black", hjust = 0, size = 3) + coord_cartesian(ylim = c(0, 1))

  
fileName <- paste0("log2FC_barplot_ppGroup_A485_vs_DMSO_up")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```

## [2.13] How to define multiconnected hub using regulatory loops?
```{r}
# IMPORTING GENE ANNO DATA FOR P-N LOOPS
name <- "chromo_cons_annoHierarchy"

geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                 diffCutoff = 0.2,
                                 annoList = c("P-P", "P-E", "P-S", "P-X")) %>% dplyr::select(c(1, 2, 3, 4, 5, 6, 11, 12, 13, 22, 23, 24, 25, 27, 28))

data <- fread(here(consensusDir, paste0(name, ".tsv"))) %>% dplyr::select(c(1, 2, 3, 4, 5, 6, 11, 12, 13, 22, 23, 24, 25, 27)) %>%
  dplyr::mutate(binSize = end1 - start1,
                adj_start1 = ifelse(binSize == 5000, start1,
                                    ifelse(binSize == 10000, start1,
                                           start1 + 10000)),
                adj_end1 = adj_start1 + 5000,
                adj_start2 = ifelse(binSize == 5000, start2,
                                    ifelse(binSize == 10000, start2,
                                           start2 + 10000)),
                adj_end2 = adj_start2 + 5000,
                anchor1 = paste(chrom1, adj_start1, adj_end1, sep = "_"),
                anchor2 = paste(chrom2, adj_start2, adj_end2, sep = "_"))

# data <- fread(here(consensusDir, paste0(name, ".tsv"))) %>% dplyr::select(c(1, 2, 3, 4, 5, 6, 11, 12, 13, 22, 23, 24, 25, 27)) %>% 
#   dplyr::mutate(binSize = end1 - start1,
#                 adj_start1 = ifelse(binSize == 5000, start1,
#                                     ifelse(binSize == 10000, start1+5000,
#                                            start1 + 10000)),
#                 adj_end1 = adj_start1 + 5000,
#                 adj_start2 = ifelse(binSize == 5000, start2,
#                                     ifelse(binSize == 10000, start2 + 5000,
#                                            start2 + 10000)),
#                 adj_end2 = adj_start2 + 5000,
#                 anchor1 = paste(chrom1, adj_start1, adj_end1, sep = "_"),
#                 anchor2 = paste(chrom2, adj_start2, adj_end2, sep = "_"))

data.reg <- data %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "P-S", "P-X", "E-E", "E-S", "E-X"))

## Graph
#install.packages("igraph")
library(igraph)

# To be able to construct a graph, each anchor need to have same resolution.
# Shink the size to 10 kb
# If the anchor is 25kb, choose the middle 5kb bin
# If the anchor is 10kb, choose the left 5kb bin

graph <- graph_from_data_frame(data.reg %>% dplyr::select(anchor1, anchor2))
components <- components(graph)
data.reg$hub <- components$membership[data.reg$anchor1]

temp.bedpe <- data.reg %>% dplyr::select(chrom1, adj_start1, adj_end1, chrom2, adj_start2, adj_end2)
fwrite(temp.bedpe, here(consensusDir, paste0(name, "_regulatory_5kbAdj.bedpe")), 
       col.names = FALSE,row.names = FALSE, sep = "\t")  

hubNum <- data.reg %>% group_by(hub) %>% summarise(count = n())

# Hub size distribution
p <- ggplot(hubNum, aes(x = count)) + geom_histogram(binwidth  = 1) + theme_classic() + 
  scale_y_log10()

svglite(here(figDir, "hub_size_distribution.svg"), height = 2, width = 2)
print(p)
dev.off()

data.reg <- data.reg %>% dplyr::left_join(hubNum, by = "hub") 
data.reg <- data.reg %>% dplyr::mutate(peakID = paste(chrom1, start1, start2, sep = "_"))

# Exporting regulatory loops with specific hub size
temp.bedpe <- data.reg %>% dplyr::filter(count >= 3) %>% dplyr::select(chrom1, adj_start1, adj_end1, chrom2, adj_start2, adj_end2)
fwrite(temp.bedpe, here(consensusDir, paste0(name, "_regulatory_5kbAdj_hub3.bedpe")), 
       col.names = FALSE,row.names = FALSE, sep = "\t")  

temp.bedpe <- data.reg %>% dplyr::filter(count >= 5) %>% dplyr::select(chrom1, adj_start1, adj_end1, chrom2, adj_start2, adj_end2)
fwrite(temp.bedpe, here(consensusDir, paste0(name, "_regulatory_5kbAdj_hub5.bedpe")), 
       col.names = FALSE,row.names = FALSE, sep = "\t")

temp.bedpe <- data.reg %>% dplyr::filter(count >= 10) %>% dplyr::select(chrom1, adj_start1, adj_end1, chrom2, adj_start2, adj_end2)
fwrite(temp.bedpe, here(consensusDir, paste0(name, "_regulatory_5kbAdj_hub10.bedpe")), 
       col.names = FALSE,row.names = FALSE, sep = "\t")


# Adding gene annotation to hubs
genePeakPair <- geneAnnoData %>% dplyr::mutate(peakID = paste(chrom1, start1, start2, sep = "_")) %>% dplyr::select(peakID, gene)
data.reg <- data.reg %>% dplyr::left_join(genePeakPair, by = "peakID")

# During unnesting, E-N loops without gene annotated are removed
data.reg <- data.reg %>% unnest(gene) %>% dplyr::mutate(
  group = ifelse(gene %in% gene.group1, "group1",
                 ifelse(gene %in% gene.group2, "group2",
                        ifelse(gene %in% gene.group3, "group3",
                               ifelse(gene %in% gene.group4, "group4",
                                      ifelse(gene %in% gene.group5, "group5",
                                             ifelse(gene %in% gene.group6, "group6",
                                                    ifelse(gene %in% gene.group7, "group7",
                                                           ifelse(gene %in% gene.group8, "group8",
                                                                  ifelse(gene %in% gene.group9, "group9", NA)))))))))
)


temp <- data.reg %>% dplyr::select(gene, hub, count, group) %>%
  group_by(gene) %>% slice_max(count, with_ties = FALSE) %>%
  dplyr::filter(group %in% c("group1", "group2", "group5", "group8"))


# TEMP START
# TEMP START
geneList.group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group1.tsv"))
geneList.group1.temp <- geneList.group1 %>% dplyr::left_join(temp, by = c("gene"))
# TEMP END

getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$count
  distance2 <- (data %>% dplyr::filter(group ==group2) )$count
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}


pv12 <- round(getPvalWilcox(tempSum,"group1", "group2"), 5)
pv15 <- round(getPvalWilcox(tempSum,"group1", "group5"), 5)
pv18 <- round(getPvalWilcox(tempSum,"group1", "group8"), 5)
pv25 <- round(getPvalWilcox(tempSum,"group2", "group5"), 5)
pv28 <- round(getPvalWilcox(tempSum,"group2", "group8"), 5)
pv58 <- round(getPvalWilcox(tempSum,"group5", "group8"), 5)


p <- ggplot(temp, aes(x = group, y = count, fill = group)) + 
  geom_boxplot(width = 0.5, outlier.shape = NA) + theme_classic() + 
    scale_y_continuous(breaks = seq(0, 30, by = 5), limits = c(0, 30)) + 

  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")  +
  annotate("text", x = 1, y = 3, label = paste0("pv12: ", pv12, "\n",
                                                "pv15: ", pv15, "\n",
                                                "pv18: ", pv18, "\n",
                                                "pv25: ", pv25, "\n",
                                                "pv28: ", pv28, "\n",
                                                "pv58: ", pv58, "\n"), 
           color = "black", hjust = 0, size = 1) +  theme(legend.position = "none")


fileName <- paste0("countHub_barplot_diffGroup_dTAG_vs_DMSO")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()



```


## [2.14] Looking deeper into TAD and insulation
### Insulation score of TSS?
#### Importing insulation score
```{r}
# Import insulation score calculated with python
resultDir <- here("../../result")
insScore.DMSO <- fread(here(resultDir, "TAD", "insulationScore_25kb_G1DMSO.tsv")) %>% 
  dplyr::select(c("chrom", "start", "end", "log2_insulation_score_125000")) %>%
  dplyr::mutate(binID = paste(chrom, start, end, sep = "_"))
colnames(insScore.DMSO) <- c("chrom", "start", "end", "insulationScore", "binID")
insScore.dTAG <- fread(here(resultDir, "TAD", "insulationScore_25kb_G1dTAG.tsv")) %>% 
  dplyr::select(c("chrom", "start", "end", "log2_insulation_score_125000")) %>%
  dplyr::mutate(binID = paste(chrom, start, end, sep = "_"))
colnames(insScore.dTAG) <- c("chrom", "start", "end", "insulationScore", "binID")

#insScore <- full_join(insScore.DMSO, insScore.dTAG, by = c("binID")) %>%
#  dplyr::select(binID, chrom.x, start.x, end.x, insulationScore.x, insulationScore.y) 

#colnames(insScore) <- c("binID", "chr", "start", "end", "insulation_score_DMSO", "insulation_scoare_dTAG")

#View(insScore)

```

#### Linking to gene
```{r}
gene.TSS.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3)) %>%
  dplyr::select(V6, V5, V1, TSS)
colnames(gene.TSS.tb) <- c("ensembl", "gene", "chr", "TSS")

getInsulationScore <- function(chr, TSS, insScore.tb){
  temp.tb <- insScore.tb %>% dplyr::filter(chrom == chr, start < TSS, end > TSS)
  out <- temp.tb$insulationScore
  if(length(out) < 1){
    return(NA)
  }else{
    return(temp.tb$insulationScore)
  }
}

gene.insScore.all <- gene.TSS.tb %>% rowwise() %>% dplyr::mutate(
  log2_insScore_DMSO = getInsulationScore(chr, TSS, insScore.DMSO),
  insScore_DMSO = 2^log2_insScore_DMSO,
  log2_insScore_dTAG = getInsulationScore(chr, TSS, insScore.dTAG),
  insScore_dTAG = 2^log2_insScore_dTAG,
  diff_insScore = insScore_dTAG - insScore_DMSO)


```

#### Checking insulation score of the nearest TAD
```{r}
# Importing TAD boundaries
tad_boundary <- fread(here("../../result/TAD", "TAD_25kb_125kb_otsu_boundaries_G1DMSO.bed"))
colnames(tad_boundary) <- c("chr", "start", "end")
tad_boundary <- tad_boundary %>% dplyr::mutate(tad_id = paste(chr, start, end, sep = "_"),
                                               center = (start + end)/2)

findClosestTADBoundary <- function(chrom, TSS, tad_boundary){
  temp <- tad_boundary %>% dplyr::filter(chr == chrom) %>%
    dplyr::mutate(distance = abs(center - TSS)) %>%
    slice_min(distance)
  if(nrow(temp) == 1){
      return(temp$center)
  }else{
    return(NA)
  }
}


gene.insScore.all <- gene.insScore.all %>% rowwise() %>%
  dplyr::mutate(closestBoundary = findClosestTADBoundary(chr, TSS, tad_boundary),
                log2_boundaryInsScore_DMSO = getInsulationScore(chr, closestBoundary, insScore.DMSO),
                boundaryInsScore_DMSO = 2^log2_boundaryInsScore_DMSO,
                log2_boundaryInsScore_dTAG = getInsulationScore(chr, closestBoundary, insScore.dTAG),
                boundaryInsScore_dTAG = 2^log2_boundaryInsScore_dTAG,
                diff_boundaryInsScore = boundaryInsScore_dTAG - boundaryInsScore_DMSO)

saveRDS(gene.insScore.all, here(resultDir, "gene.insScore.all.rds"))
gene.insScore.all <- readRDS(here(resultDir, "gene.insScore.all.rds"))
```
##### Plotting for binary groups
```{r}
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene


gene.insScore <- gene.insScore.all %>% rowwise() %>% dplyr::mutate(
  group = ifelse(ensembl %in% group1, "group1",
                 ifelse(ensembl %in% group2, "group2", NA))) %>% 
  dplyr::filter(!is.na(group))


getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$score
  distance2 <- (data %>% dplyr::filter(group ==group2) )$score
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}


plot_insScore <- function(temp.tb, note, ymin = -1.5, ymax = 0){
  p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
  
  p <- ggplot(temp.tb, aes(x = ".", fill = group, y = -score)) + 
    labs(x = NULL, y = "- Insulation score") +  # Remove x-axis title
    scale_fill_manual(values = rev((c("#777777", "#F28E2C")))) +

    introdataviz::geom_split_violin(linewidth = lineMedium * mmToLineUnit, lineend = "square",
                                    alpha = .4) +
    geom_boxplot(width = 0.3, color = "black",
                 linewidth = lineMedium * mmToLineUnit, lineend = "square",
                 outlier.shape = NA,  alpha = 0.6, show.legend = FALSE) + theme_classic() +
    stat_summary(
      aes(group = group), fun = mean,
      geom = "point", shape = 21, size = 0.5,
      fill = "black", color = "black", position = position_dodge(.3)
    ) +
    
    # 
    # geom_violin(aes(fill = group), color = "black", linewidth = lineThick*mmToLineUnit, lineend = "square", show.legend = FALSE) + 
    # geom_boxplot(width = 0.1,  color = "black", linewidth = lineThick*mmToLineUnit, lineend = "square",
    #              outlier.size = 1, outlier.stroke = NA) +
    # stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 1, fill = "red", color = "black") + 
    geom_hline(yintercept = 0, linewidth = lineThick*mmToLineUnit) +
    theme_classic() + coord_cartesian(ylim = c(ymin, ymax)) +
    annotate("text", x = 1, y = ymin + 1, label = paste0("p12: ", convPvalue(p12)),
             color = "black", hjust = 0, size = 3) +
    theme(
      axis.title = element_text(
        size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
    )+   guides(
      fill = guide_legend(
        keywidth = 0.2,  # Adjust the width of the legend keys
        keyheight = 0.2  # Adjust the height of the legend keys
      ))
      
      
  fileName <- paste0("insulation_score_binaryGroup_", note)
  width <- panelSize(1.55)*mmToInch
height <- panelSize(1.2)*mmToInch
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}

# DMSO
temp.tb <- gene.insScore %>% dplyr::select(group, boundaryInsScore_DMSO)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_DMSO_binaryGroup", ymin = -1.1, ymax = -0.5)

# dTAG
temp.tb <- gene.insScore %>% dplyr::select(group, boundaryInsScore_dTAG)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_dTAG_binaryGroup", ymin =  -1.1, ymax = -0.5)

# diff
temp.tb <- gene.insScore %>% dplyr::select(group, diff_boundaryInsScore)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_diff_binaryGroup", ymin = -0.3, ymax = 0.1)
```

##### Plotting for groups
```{r}
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group2.tsv"))$gene
group5 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group5.tsv"))$gene
group8 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group8.tsv"))$gene


gene.insScore <- gene.insScore.all %>% rowwise() %>% dplyr::mutate(
  group = ifelse(ensembl %in% group1, "group1",
                 ifelse(ensembl %in% group2, "group2",
                        ifelse(ensembl %in% group5, "group5",
                               ifelse(ensembl %in% group8, "group8", NA))))) %>% 
  dplyr::filter(!is.na(group))


getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$score
  distance2 <- (data %>% dplyr::filter(group ==group2) )$score
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}


plot_insScore <- function(temp.tb, note, ymin = 0, ymax = 1.5){
  p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
  p15 <- round(getPvalWilcox(temp.tb, "group1", "group5"), 5)
  p18 <- round(getPvalWilcox(temp.tb, "group1", "group8"), 5)
  p25 <- round(getPvalWilcox(temp.tb, "group2", "group5"), 5)
  p28 <- round(getPvalWilcox(temp.tb, "group2", "group8"), 5)
  p58 <- round(getPvalWilcox(temp.tb, "group5", "group8"), 5)
  
  p <- ggplot(temp.tb, aes(x = group, y = score)) + geom_violin(aes(fill = group), show.legend = FALSE) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() + coord_cartesian(ylim = c(ymin, ymax)) +
    annotate("text", x = 1, y = ymin + 1, label = paste0("p12: ", convPvalue(p12), "\n",
                                                         "p15: ", convPvalue(p15), "\n",
                                                         "p18: ", convPvalue(p18), "\n",
                                                         "p25: ", convPvalue(p25), "\n",
                                                         "p28: ", convPvalue(p28), "\n",
                                                         "p58: ",convPvalue( p58), "\n"),
             color = "black", hjust = 0, size = 3)
  
  
  fileName <- paste0("insulation_score_", note)
  height <- 3
  width <- 3
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}

# DMSO
temp.tb <- gene.insScore %>% dplyr::select(group, boundaryInsScore_DMSO)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_DMSO")

# dTAG
temp.tb <- gene.insScore %>% dplyr::select(group, boundaryInsScore_dTAG)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_dTAG")

# diff
temp.tb <- gene.insScore %>% dplyr::select(group, diff_boundaryInsScore)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_diff", ymin = -0.5, ymax = 1)
```

##### Plotting for P-N
```{r}
temp2 <- readRDS(here(resultDir, "gene_loop_link.rds"))

pnOver8 <- (temp2 %>% dplyr::filter(total >= 8))$gene
pnOver6 <- (temp2 %>% dplyr::filter(total >= 6, total < 8))$gene
pnOver4 <- (temp2 %>% dplyr::filter(total >= 4, total < 6))$gene
pnOver2 <- (temp2 %>% dplyr::filter(total >= 2, total < 4))$gene
pnOver0 <- (temp2 %>% dplyr::filter(total < 2))$gene


gene.insScore <- gene.insScore.all %>% rowwise() %>% dplyr::mutate(
  group = ifelse(ensembl %in% pnOver8, "p8",
                 ifelse(ensembl %in% pnOver6, "p6",
                        ifelse(ensembl %in% pnOver4, "p4",
                               ifelse(ensembl %in% pnOver2, "p2", 
                                      ifelse(ensembl %in% pnOver0, "p0", NA))))))  %>% 
  dplyr::filter(!is.na(group))

getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$score
  distance2 <- (data %>% dplyr::filter(group ==group2) )$score
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

plot_insScore <- function(temp.tb, note, ymin = 0, ymax = 1.5){
  p02 <- round(getPvalWilcox(temp.tb, "p0", "p2"), 5)
  p24 <- round(getPvalWilcox(temp.tb, "p2", "p4"), 5)
  p46 <- round(getPvalWilcox(temp.tb, "p4", "p6"), 5)
  p68 <- round(getPvalWilcox(temp.tb, "p6", "p8"), 5)
  p48 <- round(getPvalWilcox(temp.tb, "p4", "p8"), 5)
  p28 <- round(getPvalWilcox(temp.tb, "p2", "p8"), 5)
  p08 <- round(getPvalWilcox(temp.tb, "p0", "p8"), 5)
  
  p <- ggplot(temp.tb, aes(x = group, y = score)) + geom_violin(aes(fill = group), show.legend = FALSE) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() + coord_cartesian(ylim = c(ymin, ymax)) +
    annotate("text", x = 1, y = ymin + 1, label = paste0("p02: ", convPvalue(p02), "\n",
                                                         "p24: ", convPvalue(p24), "\n",
                                                         "p46: ", convPvalue(p46), "\n",
                                                         "p68: ", convPvalue(p68), "\n",
                                                         "p48: ", convPvalue(p48), "\n",
                                                         "p28: ",convPvalue( p28), "\n",
                                                         "p08: ",convPvalue( p08), "\n"),
             color = "black", hjust = 0, size = 3)  +
    scale_fill_manual(values = c("#d9d9d9", "#bdbdbd", "#969696", "#737373", "#525252"))
  
  
  fileName <- paste0("insulation_score_", note)
  height <- 3
  width <- 3
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}

# DMSO
temp.tb <- gene.insScore %>% dplyr::select(group, boundaryInsScore_DMSO)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_DMSO_P-N")

# dTAG
temp.tb <- gene.insScore %>% dplyr::select(group, boundaryInsScore_dTAG)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_dTAG_P-N")

# diff
temp.tb <- gene.insScore %>% dplyr::select(group, diff_boundaryInsScore)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_diff_P-N", ymin = -0.5, ymax = 1)

```
##### Plotting for P-S
```{r}
temp2 <- readRDS(here(resultDir, "gene_loop_link.rds"))

psOver4 <- (temp2 %>% dplyr::filter(num_ps >= 4))$gene
psOver3 <- (temp2 %>% dplyr::filter(num_ps >= 3, num_ps < 4))$gene
psOver2 <- (temp2 %>% dplyr::filter(num_ps >= 2, num_ps < 3))$gene
psOver1 <- (temp2 %>% dplyr::filter(num_ps >= 1, num_ps < 2))$gene
psOver0 <- (temp2 %>% dplyr::filter(num_ps < 1))$gene


gene.insScore <- gene.insScore.all %>% rowwise() %>% dplyr::mutate(
  group = ifelse(ensembl %in% psOver4, "p4",
                 ifelse(ensembl %in% psOver3, "p3",
                        ifelse(ensembl %in% psOver2, "p2",
                               ifelse(ensembl %in% psOver1, "p1", 
                                      ifelse(ensembl %in% psOver0, "p0", NA))))))  %>% 
  dplyr::filter(!is.na(group))

getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$score
  distance2 <- (data %>% dplyr::filter(group ==group2) )$score
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

plot_insScore <- function(temp.tb, note, ymin = 0, ymax = 1.5){
  p01 <- round(getPvalWilcox(temp.tb, "p0", "p1"), 5)
  p12 <- round(getPvalWilcox(temp.tb, "p1", "p2"), 5)
  p23 <- round(getPvalWilcox(temp.tb, "p2", "p3"), 5)
  p34 <- round(getPvalWilcox(temp.tb, "p3", "p4"), 5)
  p24 <- round(getPvalWilcox(temp.tb, "p2", "p4"), 5)
  p14 <- round(getPvalWilcox(temp.tb, "p1", "p4"), 5)
  p04 <- round(getPvalWilcox(temp.tb, "p0", "p4"), 5)
  
  p <- ggplot(temp.tb, aes(x = group, y = score)) + geom_violin(aes(fill = group), show.legend = FALSE) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() + coord_cartesian(ylim = c(ymin, ymax)) +
    annotate("text", x = 1, y = ymin + 1, label = paste0("p01: ", convPvalue(p01), "\n",
                                                         "p12: ", convPvalue(p12), "\n",
                                                         "p23: ", convPvalue(p23), "\n",
                                                         "p34: ", convPvalue(p34), "\n",
                                                         "p24: ", convPvalue(p24), "\n",
                                                         "p14: ",convPvalue( p14), "\n",
                                                         "p04: ",convPvalue( p04), "\n"), 
             color = "black", hjust = 0, size = 3) +
    scale_fill_manual(values = c("#d9d9d9", "#bdbdbd", "#969696", "#737373", "#525252"))
  
  
  fileName <- paste0("insulation_score_", note)
  height <- 3
  width <- 3
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}

# DMSO
temp.tb <- gene.insScore %>% dplyr::select(group, boundaryInsScore_DMSO)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_DMSO_P-S")

# dTAG
temp.tb <- gene.insScore %>% dplyr::select(group, boundaryInsScore_dTAG)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_dTAG_P-S")

# diff
temp.tb <- gene.insScore %>% dplyr::select(group, diff_boundaryInsScore)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_diff_P-S", ymin = -0.5, ymax = 1)
```
##### Plotting for P-E
```{r}
temp2 <- readRDS(here(resultDir, "gene_loop_link.rds"))

peOver4 <- (temp2 %>% dplyr::filter(num_pe >= 4))$gene
peOver3 <- (temp2 %>% dplyr::filter(num_pe >= 3, num_pe < 4))$gene
peOver2 <- (temp2 %>% dplyr::filter(num_pe >= 2, num_pe < 3))$gene
peOver1 <- (temp2 %>% dplyr::filter(num_pe >= 1, num_pe < 2))$gene
peOver0 <- (temp2 %>% dplyr::filter(num_pe < 1))$gene


gene.insScore <- gene.insScore.all %>% rowwise() %>% dplyr::mutate(
  group = ifelse(ensembl %in% peOver4, "p4",
                 ifelse(ensembl %in% peOver3, "p3",
                        ifelse(ensembl %in% peOver2, "p2",
                               ifelse(ensembl %in% peOver1, "p1", 
                                      ifelse(ensembl %in% peOver0, "p0", NA))))))  %>% 
  dplyr::filter(!is.na(group))

getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$score
  distance2 <- (data %>% dplyr::filter(group ==group2) )$score
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

plot_insScore <- function(temp.tb, note, ymin = 0, ymax = 1.5){
  p01 <- round(getPvalWilcox(temp.tb, "p0", "p1"), 5)
  p12 <- round(getPvalWilcox(temp.tb, "p1", "p2"), 5)
  p23 <- round(getPvalWilcox(temp.tb, "p2", "p3"), 5)
  p34 <- round(getPvalWilcox(temp.tb, "p3", "p4"), 5)
  p24 <- round(getPvalWilcox(temp.tb, "p2", "p4"), 5)
  p14 <- round(getPvalWilcox(temp.tb, "p1", "p4"), 5)
  p04 <- round(getPvalWilcox(temp.tb, "p0", "p4"), 5)
  
  p <- ggplot(temp.tb, aes(x = group, y = score)) + geom_violin(aes(fill = group), show.legend = FALSE) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() + coord_cartesian(ylim = c(ymin, ymax)) +
    annotate("text", x = 1, y = ymin + 1, label = paste0("p01: ", convPvalue(p01), "\n",
                                                         "p12: ", convPvalue(p12), "\n",
                                                         "p23: ", convPvalue(p23), "\n",
                                                         "p34: ", convPvalue(p34), "\n",
                                                         "p24: ", convPvalue(p24), "\n",
                                                         "p14: ",convPvalue( p14), "\n",
                                                         "p04: ",convPvalue( p04), "\n"), 
             color = "black", hjust = 0, size = 3) +
    scale_fill_manual(values = c("#d9d9d9", "#bdbdbd", "#969696", "#737373", "#525252"))
  
  
  fileName <- paste0("insulation_score_", note)
  height <- 3
  width <- 3
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}

# DMSO
temp.tb <- gene.insScore %>% dplyr::select(group, boundaryInsScore_DMSO)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_DMSO_P-E")

# dTAG
temp.tb <- gene.insScore %>% dplyr::select(group, boundaryInsScore_dTAG)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_dTAG_P-E")

# diff
temp.tb <- gene.insScore %>% dplyr::select(group, diff_boundaryInsScore)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_diff_P-E", ymin = -0.5, ymax = 1)
```
##### Plotting for P-P
```{r}
temp2 <- readRDS(here(resultDir, "gene_loop_link.rds"))

ppOver4 <- (temp2 %>% dplyr::filter(num_pp >= 4))$gene
ppOver3 <- (temp2 %>% dplyr::filter(num_pp >= 3, num_pp < 4))$gene
ppOver2 <- (temp2 %>% dplyr::filter(num_pp >= 2, num_pp < 3))$gene
ppOver1 <- (temp2 %>% dplyr::filter(num_pp >= 1, num_pp < 2))$gene
ppOver0 <- (temp2 %>% dplyr::filter(num_pp < 1))$gene


gene.insScore <- gene.insScore.all %>% rowwise() %>% dplyr::mutate(
  group = ifelse(ensembl %in% ppOver4, "p4",
                 ifelse(ensembl %in% ppOver3, "p3",
                        ifelse(ensembl %in% ppOver2, "p2",
                               ifelse(ensembl %in% ppOver1, "p1", 
                                      ifelse(ensembl %in% ppOver0, "p0", NA))))))  %>% 
  dplyr::filter(!is.na(group))

getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$score
  distance2 <- (data %>% dplyr::filter(group ==group2) )$score
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

plot_insScore <- function(temp.tb, note, ymin = 0, ymax = 1.5){
  p01 <- round(getPvalWilcox(temp.tb, "p0", "p1"), 5)
  p12 <- round(getPvalWilcox(temp.tb, "p1", "p2"), 5)
  p23 <- round(getPvalWilcox(temp.tb, "p2", "p3"), 5)
  p34 <- round(getPvalWilcox(temp.tb, "p3", "p4"), 5)
  p24 <- round(getPvalWilcox(temp.tb, "p2", "p4"), 5)
  p14 <- round(getPvalWilcox(temp.tb, "p1", "p4"), 5)
  p04 <- round(getPvalWilcox(temp.tb, "p0", "p4"), 5)
  
  p <- ggplot(temp.tb, aes(x = group, y = score)) + geom_violin(aes(fill = group), show.legend = FALSE) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() + coord_cartesian(ylim = c(ymin, ymax)) +
    annotate("text", x = 1, y = ymin + 1, label = paste0("p01: ", convPvalue(p01), "\n",
                                                         "p12: ", convPvalue(p12), "\n",
                                                         "p23: ", convPvalue(p23), "\n",
                                                         "p34: ", convPvalue(p34), "\n",
                                                         "p24: ", convPvalue(p24), "\n",
                                                         "p14: ",convPvalue( p14), "\n",
                                                         "p04: ",convPvalue( p04), "\n"), 
             color = "black", hjust = 0, size = 3) +
    scale_fill_manual(values = c("#d9d9d9", "#bdbdbd", "#969696", "#737373", "#525252"))
  
  
  fileName <- paste0("insulation_score_", note)
  height <- 3
  width <- 3
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}

# DMSO
temp.tb <- gene.insScore %>% dplyr::select(group, boundaryInsScore_DMSO)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_DMSO_P-P")

# dTAG
temp.tb <- gene.insScore %>% dplyr::select(group, boundaryInsScore_dTAG)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_dTAG_P-P")

# diff
temp.tb <- gene.insScore %>% dplyr::select(group, diff_boundaryInsScore)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_diff_P-P", ymin = -0.5, ymax = 1)
```


## [2.15] TAD distance/size
#### Link TAD
```{r}
refDir <- here("../..", "reference")

# Importing TAD boundary
tad_boundary <- fread(here("../../result/TAD", "TAD_25kb_125kb_otsu_boundaries_G1DMSO.bed"))
colnames(tad_boundary) <- c("chr", "start", "end")
tad_boundary <- tad_boundary %>% dplyr::mutate(tad_id = paste(chr, start, end, sep = "_"),
                                               center = (start + end)/2)

# Importing TAD
tad <- fread(here("../../result/TAD", "TAD_25kb_125kb_otsu_G1DMSO.bedpe"))
colnames(tad) <- c("chr1", "start1", "end1", "chr2", "start2", "end2")
tad <- tad %>% dplyr::mutate(tadID = paste(chr1, start1, end1, sep = "_"),
                             tadSize = end1 - start1)

# Importing gene
gene.TSS.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3)) %>%
  dplyr::select(V6, V5, V1, TSS)
colnames(gene.TSS.tb) <- c("ensembl", "gene", "chr", "TSS")

# Functions
findDistanceToTAD <- function(chrom, TSS, tad_boundary){
  temp <- tad_boundary %>% dplyr::filter(chr == chrom) %>%
    dplyr::mutate(distance = abs(center - TSS)) %>% slice_min(distance)
  if(nrow(temp) == 1){
    return(temp$distance)
  }else{
    return(NA)
  }
}

findItsTAD <- function(chrom, TSS, tad){
  temp <- tad %>% dplyr::filter(chr1 == chrom,
                                start1 < TSS,
                                end1 > TSS)
  if(nrow(temp) == 1){
    return(temp$tadID)
  }else{
    return(NA)
  }
}

findTADSize <-function(chrom, TSS, tad){
  temp <- tad %>% dplyr::filter(chr1 == chrom,
                                start1 < TSS,
                                end1 > TSS)
  if(nrow(temp) == 1){
    return(temp$tadSize)
  }else{
    return(NA)
  }
}

gene.TSS.tb <- gene.TSS.tb %>% rowwise() %>%
  dplyr::mutate(distance = findDistanceToTAD(chr, TSS, tad_boundary),
                TAD = findItsTAD(chr, TSS, tad),
                TADsize = findTADSize(chr, TSS, tad))
```

##### dTAG - binaryGroup
```{r}
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene


gene.TSS.tb.plot <- gene.TSS.tb %>% rowwise() %>% dplyr::mutate(
  group = ifelse(ensembl %in% group1, "group1",
                 ifelse(ensembl %in% group2, "group2", NA))) %>% 
  dplyr::filter(!is.na(group))


## Plot distance
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group == group1) )$distance
  distance2 <- (data %>% dplyr::filter(group == group2) )$distance
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}


plot_TADdistance <- function(temp.tb, note, ymin = 0, ymax = 2000000){
  p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
  
  p <- ggplot(temp.tb, aes(x = ".", fill = group, y = distance)) +
    # Set axis labels (no x-axis title)
    scale_fill_manual(values = ((c("#777777", "#F28E2C")))) +
    labs(x = NULL, y = "Distance from TAD boundary") +
    # Violin plot with black outline, customized line width and end
    introdataviz::geom_split_violin(
     color = "black", alpha = 0.4,
      linewidth = lineMedium * mmToLineUnit, lineend = "square"
    ) +
    # Box plot with customized line width, square end, and outlier size
    geom_boxplot(
      width = 0.3, color = "black", alpha = 0.6,
      linewidth = lineMedium * mmToLineUnit, lineend = "square",
      outlier.shape = NA,  alpha = 0.6, show.legend = FALSE
    ) +
    
    # Mean point in each group
    stat_summary(
      aes(group = group), fun = mean,
      geom = "point", shape = 21, size = 0.5,
      fill = "black", color = "black", position = position_dodge(.3)
    ) +
    
    # Horizontal line at y = 0
    geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
    
    # Coordinate limits and custom y-axis labels
    coord_cartesian(ylim = c(ymin, ymax)) +
    scale_y_continuous(labels = label_kb_mb) +
    
    # Annotate p-value text
    annotate(
      "text", x = 1, y = ymin + 1,
      label = paste0("p12: ", convPvalue(p12)),
      color = "black", hjust = 0, size = 3
    ) +
    
    # Theme customization
    theme_classic() +
    theme(
      axis.title = element_text(
        size = fontSizeM, family = fontType, color = "#000000"
      ),
      axis.text = element_text(
        size = fontSizeS, family = fontType, color = "#000000"
      ),
      axis.text.x = element_text(
        angle = 45, hjust = 1, vjust = 1
      ),
      axis.line = element_line(
        color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
      ),
      axis.ticks = element_line(
        color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
      ),
      panel.background = element_rect(fill = "transparent"),
      legend.text = element_text(family = fontType, size = fontSizeS),
      legend.title = element_text(family = fontType, size = fontSizeS)) +
    guides(
      fill = guide_legend(
        keywidth = 0.2,  # Adjust the width of the legend keys
        keyheight = 0.2  # Adjust the height of the legend keys
      )
    )
  
  
  fileName <- paste0("distance_to_boundary_", note)
  width <- panelSize(1.55)*mmToInch
  height <- panelSize(1.2)*mmToInch
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}

plot_TADdistance(gene.TSS.tb.plot, "group_binayGroup", ymax = 1000000)


## Plot TAD size
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group == group1) )$TADsize
  distance2 <- (data %>% dplyr::filter(group == group2) )$TADsize
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}


plot_TADsize <- function(temp.tb, note, ymin = 0, ymax = 2000000){
  p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
  
  p <- ggplot(temp.tb, aes(x = ".", fill = group, y = TADsize)) +
    # Set axis labels (no x-axis title)
    labs(x = NULL, y = "Size of TAD") +
        scale_fill_manual(values = ((c("#777777", "#F28E2C")))) +

    # Violin plot with black outline, customized line width and end
    introdataviz::geom_split_violin(
      color = "black",
      linewidth = lineMedium* mmToLineUnit, lineend = "square", alpha = .4
    ) +
    
    # Box plot with customized line width, square end, and outlier size
    geom_boxplot(
      width = 0.3, color = "black",
      linewidth = lineMedium* mmToLineUnit, lineend = "square",
      outlier.shape = NA,  alpha = 0.6, show.legend = FALSE
    ) +
    
    # Mean point in each group
    stat_summary(
      aes(group = group), fun = mean,
      geom = "point", shape = 21, size = 0.5,
      fill = "black", color = "black", position = position_dodge(.3)
    ) +
    
    # Horizontal line at y = 0
    geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
    
    # Coordinate limits and custom y-axis labels
    coord_cartesian(ylim = c(ymin, ymax)) +
    scale_y_continuous(labels = label_kb_mb) +
    
    # Annotate p-value text
    annotate(
      "text", x = 1, y = ymin + 1,
      label = paste0("p12: ", convPvalue(p12)),
      color = "black", hjust = 0, size = 3
    ) +
    
    # Theme customization
  theme_classic() +
  theme(
    axis.title = element_text(
      size = fontSizeM, family = fontType, color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45, hjust = 1, vjust = 1
    ),
    axis.line = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  ) + guides(
      fill = guide_legend(
        keywidth = 0.2,  # Adjust the width of the legend keys
        keyheight = 0.2  # Adjust the height of the legend keys
      ))
  
  
  
  fileName <- paste0("size_of_boundary_", note)
  width <- panelSize(1.55)*mmToInch
  height <- panelSize(1.2)*mmToInch
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}
plot_TADsize(gene.TSS.tb.plot, "group_binaryGroup", ymax = 3e6)
```

##### dTAG - Plotting for groups
```{r}
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group2.tsv"))$gene
group5 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group5.tsv"))$gene
group8 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group8.tsv"))$gene


gene.TSS.tb.plot <- gene.TSS.tb %>% rowwise() %>% dplyr::mutate(
  group = ifelse(ensembl %in% group1, "group1",
                 ifelse(ensembl %in% group2, "group2",
                        ifelse(ensembl %in% group5, "group5",
                               ifelse(ensembl %in% group8, "group8", NA))))) %>% 
  dplyr::filter(!is.na(group))


## Plot distance
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group == group1) )$distance
  distance2 <- (data %>% dplyr::filter(group == group2) )$distance
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}


plot_TADdistance <- function(temp.tb, note, ymin = 0, ymax = 2000000){
  p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
  p15 <- round(getPvalWilcox(temp.tb, "group1", "group5"), 5)
  p18 <- round(getPvalWilcox(temp.tb, "group1", "group8"), 5)
  p25 <- round(getPvalWilcox(temp.tb, "group2", "group5"), 5)
  p28 <- round(getPvalWilcox(temp.tb, "group2", "group8"), 5)
  p58 <- round(getPvalWilcox(temp.tb, "group5", "group8"), 5)
  
  p <- ggplot(temp.tb, aes(x = group, y = distance)) + geom_violin(aes(fill = group), show.legend = FALSE) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() + scale_y_continuous(labels = label_kb_mb) +
    annotate("text", x = 1, y = (ymin + ymax)/2 + 1, label = paste0("p12: ", convPvalue(p12), "\n",
                                                         "p15: ", convPvalue(p15), "\n",
                                                         "p18: ", convPvalue(p18), "\n",
                                                         "p25: ", convPvalue(p25), "\n",
                                                         "p28: ", convPvalue(p28), "\n",
                                                         "p58: ",convPvalue( p58), "\n"),
             color = "black", hjust = 0, size = 3)
  
  
  fileName <- paste0("distance_to_boundary_", note)
  height <- 3
  width <- 3
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}

plot_TADdistance(gene.TSS.tb.plot, "group")


## Plot TAD size
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group == group1) )$TADsize
  distance2 <- (data %>% dplyr::filter(group == group2) )$TADsize
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}


plot_TADsize <- function(temp.tb, note, ymin = 0, ymax = 2000000){
  p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
  p15 <- round(getPvalWilcox(temp.tb, "group1", "group5"), 5)
  p18 <- round(getPvalWilcox(temp.tb, "group1", "group8"), 5)
  p25 <- round(getPvalWilcox(temp.tb, "group2", "group5"), 5)
  p28 <- round(getPvalWilcox(temp.tb, "group2", "group8"), 5)
  p58 <- round(getPvalWilcox(temp.tb, "group5", "group8"), 5)
  
  p <- ggplot(temp.tb, aes(x = group, y = TADsize)) + geom_violin(aes(fill = group), show.legend = FALSE) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() + scale_y_continuous(labels = label_kb_mb) +
    annotate("text", x = 1, y = (ymin + ymax)/2 + 1, label = paste0("p12: ", convPvalue(p12), "\n",
                                                         "p15: ", convPvalue(p15), "\n",
                                                         "p18: ", convPvalue(p18), "\n",
                                                         "p25: ", convPvalue(p25), "\n",
                                                         "p28: ", convPvalue(p28), "\n",
                                                         "p58: ",convPvalue( p58), "\n"),
             color = "black", hjust = 0, size = 3)
  
  
  fileName <- paste0("size_of_boundary_", note)
  height <- 3
  width <- 3
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}
plot_TADsize(gene.TSS.tb.plot, "group", ymax = 6e6)
```

##### A485 - Plotting for groups
```{r}
group1 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group2.tsv"))$gene
group3 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group3.tsv"))$gene
group4 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group4.tsv"))$gene
group5 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group5.tsv"))$gene
group6 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group6.tsv"))$gene
group7 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group7.tsv"))$gene
group8 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group8.tsv"))$gene
group9 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group9.tsv"))$gene


gene.TSS.tb.plot <- gene.TSS.tb %>%
  rowwise() %>%
  dplyr::mutate(
    group = dplyr::case_when(
      ensembl %in% group1 ~ "group1",
      ensembl %in% group2 ~ "group2",
      ensembl %in% group3 ~ "group3",
      ensembl %in% group4 ~ "group4",
      ensembl %in% group5 ~ "group5",
      ensembl %in% group6 ~ "group6",
      ensembl %in% group7 ~ "group7",
      ensembl %in% group8 ~ "group8",
      ensembl %in% group9 ~ "group9",
      TRUE ~ NA_character_
    )
  ) %>%
  dplyr::filter(!is.na(group))

## Plot distance
# getPvalWilcox <- function(data, group1, group2){
#   distance1 <- (data %>% dplyr::filter(group == group1) )$distance
#   distance2 <- (data %>% dplyr::filter(group == group2) )$distance
#   wil <- wilcox.test(distance1, distance2)
#   return(wil$p.value)
# }


plot_TADdistance <- function(temp.tb, note, ymin = 0, ymax = 2000000){
  # p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
  # p15 <- round(getPvalWilcox(temp.tb, "group1", "group5"), 5)
  # p18 <- round(getPvalWilcox(temp.tb, "group1", "group8"), 5)
  # p25 <- round(getPvalWilcox(temp.tb, "group2", "group5"), 5)
  # p28 <- round(getPvalWilcox(temp.tb, "group2", "group8"), 5)
  # p58 <- round(getPvalWilcox(temp.tb, "group5", "group8"), 5)
  # 
  p <- ggplot(temp.tb, aes(x = group, y = distance)) + geom_violin(aes(fill = group), show.legend = FALSE) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() + scale_y_continuous(labels = label_kb_mb)
    # annotate("text", x = 1, y = (ymin + ymax)/2 + 1, label = paste0("p12: ", convPvalue(p12), "\n",
    #                                                      "p15: ", convPvalue(p15), "\n",
    #                                                      "p18: ", convPvalue(p18), "\n",
    #                                                      "p25: ", convPvalue(p25), "\n",
    #                                                      "p28: ", convPvalue(p28), "\n",
    #                                                      "p58: ",convPvalue( p58), "\n"),
    #          color = "black", hjust = 0, size = 3)
  
  
  fileName <- paste0("distance_to_boundary_", note)
  height <- 3
  width <- 3
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}

plot_TADdistance(gene.TSS.tb.plot, "group_A485")


## Plot TAD size
# getPvalWilcox <- function(data, group1, group2){
#   distance1 <- (data %>% dplyr::filter(group == group1) )$TADsize
#   distance2 <- (data %>% dplyr::filter(group == group2) )$TADsize
#   wil <- wilcox.test(distance1, distance2)
#   return(wil$p.value)
# }
# 

plot_TADsize <- function(temp.tb, note, ymin = 0, ymax = 2000000){
  # p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
  # p15 <- round(getPvalWilcox(temp.tb, "group1", "group5"), 5)
  # p18 <- round(getPvalWilcox(temp.tb, "group1", "group8"), 5)
  # p25 <- round(getPvalWilcox(temp.tb, "group2", "group5"), 5)
  # p28 <- round(getPvalWilcox(temp.tb, "group2", "group8"), 5)
  # p58 <- round(getPvalWilcox(temp.tb, "group5", "group8"), 5)
  
  p <- ggplot(temp.tb, aes(x = group, y = TADsize)) + geom_violin(aes(fill = group), show.legend = FALSE) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() + scale_y_continuous(labels = label_kb_mb) 
    # annotate("text", x = 1, y = (ymin + ymax)/2 + 1, label = paste0("p12: ", convPvalue(p12), "\n",
    #                                                      "p15: ", convPvalue(p15), "\n",
    #                                                      "p18: ", convPvalue(p18), "\n",
    #                                                      "p25: ", convPvalue(p25), "\n",
    #                                                      "p28: ", convPvalue(p28), "\n",
    #                                                      "p58: ",convPvalue( p58), "\n"),
    #          color = "black", hjust = 0, size = 3)
  
  
  fileName <- paste0("size_of_boundary_", note)
  height <- 3
  width <- 3
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}
plot_TADsize(gene.TSS.tb.plot, "group_A485", ymax = 6e6)
```

## [2.16] Checking the intensity of RAD21 at TAD boundary
#### Calculating RAD21 intensity of the nearest TAD boundary
```{r}
refDir <- here("../..", "reference")

# Importing TAD boundary
tad_boundary <- fread(here("../../result/TAD", "TAD_25kb_125kb_otsu_boundaries_G1DMSO.bed"))
colnames(tad_boundary) <- c("chr", "start", "end")
tad_boundary <- tad_boundary %>% dplyr::mutate(tad_id = paste(chr, start, end, sep = "_"),
                                               start = start -175000/2,
                                               end = end + 175000/2,
                                               center = (start + end)/2)

# Importing RAD21 bigwig track
bw.RAD21 <- import(here(refDir, "33250_RAD21_ab992_Bruce-4_trim_q20_dedup_black_depthNorm_bin50bp.bw"))

# Create GRanges object for TAD boundaries
tad_ranges <- makeGRangesFromDataFrame(tad_boundary, keep.extra.columns = TRUE)

# Find overlaps between all TAD boundaries and RAD21 bigwig data
overlaps <- findOverlaps(tad_ranges, bw.RAD21)

# Extract overlapping regions and scores from the BigWig
overlapping_bw <- bw.RAD21[subjectHits(overlaps)]
overlapping_tads <- tad_ranges[queryHits(overlaps)]

# Aggregate scores by TAD boundary regions


scores <- as_tibble(overlapping_bw) %>%
  dplyr::mutate(tad_id = tad_ranges[queryHits(overlaps)]$tad_id) %>%
  group_by(tad_id) %>%
  summarise(rad21Score = sum(score))

tad_boundary <- tad_boundary %>%
  left_join(scores, by = "tad_id")





# Importing gene
gene.TSS.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3)) %>%
  dplyr::select(V6, V5, V1, TSS)
colnames(gene.TSS.tb) <- c("ensembl", "gene", "chr", "TSS")


# Find nearest TAD boundary
findClosestTADBoundaryID <- function(chrom, TSS, tad_boundary){
  temp <- tad_boundary %>% dplyr::filter(chr == chrom) %>%
    dplyr::mutate(distance = abs(center - TSS)) %>%
    slice_min(distance)
  if(nrow(temp) == 1){
      return(temp$tad_id)
  }else{
    return(NA)
  }
}
gene.TSS.tb <- gene.TSS.tb %>% rowwise() %>%
  dplyr::mutate(closestBoundary = findClosestTADBoundaryID(chr, TSS, tad_boundary))

temp <- tad_boundary %>% dplyr::select(tad_id, rad21Score)

gene.TSS.tb <- gene.TSS.tb %>% left_join(temp, by = c("closestBoundary" = "tad_id") )

```


##### Plotting for groups
```{r}
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group2.tsv"))$gene
group5 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group5.tsv"))$gene
group8 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group8.tsv"))$gene


gene.TSS.tb.plot <- gene.TSS.tb %>% rowwise() %>% dplyr::mutate(
  group = ifelse(ensembl %in% group1, "group1",
                 ifelse(ensembl %in% group2, "group2",
                        ifelse(ensembl %in% group5, "group5",
                               ifelse(ensembl %in% group8, "group8", NA))))) %>% 
  dplyr::filter(!is.na(group))


## Plot distance
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group == group1) )$rad21Score
  distance2 <- (data %>% dplyr::filter(group == group2) )$rad21Score
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}


plot_rad21ScoreAtBoundary <- function(temp.tb, note){
  p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
  p15 <- round(getPvalWilcox(temp.tb, "group1", "group5"), 5)
  p18 <- round(getPvalWilcox(temp.tb, "group1", "group8"), 5)
  p25 <- round(getPvalWilcox(temp.tb, "group2", "group5"), 5)
  p28 <- round(getPvalWilcox(temp.tb, "group2", "group8"), 5)
  p58 <- round(getPvalWilcox(temp.tb, "group5", "group8"), 5)
  
  p <- ggplot(temp.tb, aes(x = group, y = rad21Score)) + geom_violin(aes(fill = group), show.legend = FALSE) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() + 
    annotate("text", x = 1, y = 20 + 1, label = paste0("p12: ", convPvalue(p12), "\n",
                                                         "p15: ", convPvalue(p15), "\n",
                                                         "p18: ", convPvalue(p18), "\n",
                                                         "p25: ", convPvalue(p25), "\n",
                                                         "p28: ", convPvalue(p28), "\n",
                                                         "p58: ",convPvalue( p58), "\n"),
             color = "black", hjust = 0, size = 3)
  
  
  fileName <- paste0("rad21Score_", note)
  height <- 3
  width <- 3
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}

plot_rad21ScoreAtBoundary(gene.TSS.tb.plot, "group")


```


## [2.17] Checking the density of gene / enhancer within designated TAD
#### Making TAD gene count
```{r}
### IMPORTING REQUIRED DATA
# Importing TAD
tad <- fread(here("../../result/TAD", "TAD_25kb_125kb_otsu_G1DMSO.bedpe"))
colnames(tad) <- c("chr1", "start1", "end1", "chr2", "start2", "end2")
tad <- tad %>% dplyr::mutate(tadID = paste(chr1, start1, end1, sep = "_"))


# Importing gene
gene.TSS.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3)) %>%
  dplyr::select(V6, V5, V1, TSS)
colnames(gene.TSS.tb) <- c("ensembl", "gene", "chr", "TSS")

# Importing enhancer
peak.H3K27ac <- as_tibble(importPeak(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.narrowPeak.bed"))) %>%
  dplyr::mutate(center = (start + end)/2)


### FUNCTION FOR COUNTING
findItsTAD <- function(chrom, coordinate, tad){
  temp <- tad %>% dplyr::filter(chr1 == chrom,
                                         start1 < coordinate,
                                         end1 > coordinate)
  if(nrow(temp) == 1){
    return(temp$tadID)
  }else{
    return(NA)
  }
}

### Counting each features for TAD
### Counting gene
geneCountPerTAD <- gene.TSS.tb %>% rowwise() %>%
  dplyr::mutate(TAD = findItsTAD(chr, TSS, tad)) %>%
  drop_na() %>%
  group_by(TAD) %>%
  summarize(count = n())

### Counting enhancer

enhCountPerTAD <- peak.H3K27ac %>% rowwise() %>%
  dplyr::mutate(TAD = findItsTAD(seqnames, center, tad)) %>%
  drop_na() %>%
  group_by(TAD) %>%
  summarize(count = n())

### Get TAD sizez
tad.db <- tad %>% left_join(geneCountPerTAD, by = c("tadID" = "TAD"), ) %>%
  left_join(enhCountPerTAD, by = c("tadID" = "TAD")) %>%
  dplyr::select(c(1, 2, 3, 7, 8, 9)) %>%
  mutate_all(~replace(., is.na(.), 0))

colnames(tad.db) <- c("chr", "start", "end", "tadID", "geneCount", "enhCount")

tad.db <- tad.db %>% dplyr::mutate(
  tadSize = end - start,
  geneDensity = geneCount/tadSize * 100e3,
  enhDensity = enhCount/tadSize* 100e3,
  regDensity = (geneCount + enhCount)/tadSize* 100e3
)



### Assign TAD and information to gene
gene.TSS.tb <- gene.TSS.tb %>% rowwise() %>%
  dplyr::mutate(TAD = findItsTAD(chr, TSS, tad))
#gene.TSS.tb <- gene.TSS.tb %>% dplyr::filter(!is.na(TAD))
temp <- tad.db %>% dplyr::select(-c(1, 2, 3))

gene.TSS.tb <- gene.TSS.tb %>% dplyr::left_join(temp, by = c("TAD" = "tadID"))
```

#### Plotting for groups
##### geneCount
###### Binary group
```{r}
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene


gene.TSS.tb.plot <- gene.TSS.tb %>% rowwise() %>% dplyr::mutate(
  group = ifelse(ensembl %in% group1, "group1",
                 ifelse(ensembl %in% group2, "group2", NA))) %>% 
  dplyr::filter(!is.na(group))


## geneCount
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group == group1) )$geneCount
  distance2 <- (data %>% dplyr::filter(group == group2) )$geneCount
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}


plot_geneCount <- function(temp.tb, note, ymin = 0, ymax = 2000000){
  p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
  
  p <- ggplot(temp.tb, aes(x = group, y = geneCount)) + geom_violin(aes(fill = group), show.legend = FALSE) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() +
    annotate("text", x = 1, y = (ymin + ymax)/2 + 1, label = paste0("p12: ", convPvalue(p12)),
             color = "black", hjust = 0, size = 3)
  
  
  fileName <- paste0("geneCount_", note)
  height <- 3
  width <- 1.5
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}
plot_geneCount(gene.TSS.tb.plot, "group_binaryGroup", ymax = 200)

```
###### Group
```{r}
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group2.tsv"))$gene
group5 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group5.tsv"))$gene
group8 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group8.tsv"))$gene


gene.TSS.tb.plot <- gene.TSS.tb %>% rowwise() %>% dplyr::mutate(
  group = ifelse(ensembl %in% group1, "group1",
                 ifelse(ensembl %in% group2, "group2",
                        ifelse(ensembl %in% group5, "group5",
                               ifelse(ensembl %in% group8, "group8", NA))))) %>% 
  dplyr::filter(!is.na(group))


## geneCount
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group == group1) )$geneCount
  distance2 <- (data %>% dplyr::filter(group == group2) )$geneCount
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}


plot_geneCount <- function(temp.tb, note, ymin = 0, ymax = 2000000){
  p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
  p15 <- round(getPvalWilcox(temp.tb, "group1", "group5"), 5)
  p18 <- round(getPvalWilcox(temp.tb, "group1", "group8"), 5)
  p25 <- round(getPvalWilcox(temp.tb, "group2", "group5"), 5)
  p28 <- round(getPvalWilcox(temp.tb, "group2", "group8"), 5)
  p58 <- round(getPvalWilcox(temp.tb, "group5", "group8"), 5)
  
  p <- ggplot(temp.tb, aes(x = group, y = geneCount)) + geom_violin(aes(fill = group), show.legend = FALSE) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() +
    annotate("text", x = 1, y = (ymin + ymax)/2 + 1, label = paste0("p12: ", convPvalue(p12), "\n",
                                                         "p15: ", convPvalue(p15), "\n",
                                                         "p18: ", convPvalue(p18), "\n",
                                                         "p25: ", convPvalue(p25), "\n",
                                                         "p28: ", convPvalue(p28), "\n",
                                                         "p58: ",convPvalue( p58), "\n"),
             color = "black", hjust = 0, size = 3)
  
  
  fileName <- paste0("geneCount_", note)
  height <- 3
  width <- 3
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}
plot_geneCount(gene.TSS.tb.plot, "group", ymax = 200)

```
##### enhCount
###### Binary Group
```{r}
## enhCount
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group == group1) )$enhCount
  distance2 <- (data %>% dplyr::filter(group == group2) )$enhCount
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}


plot_enhCount <- function(temp.tb, note, ymin = 0, ymax = 2000000){
  p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
  
  p <- ggplot(temp.tb, aes(x = group, y = enhCount)) + geom_violin(aes(fill = group), show.legend = FALSE) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() +
    annotate("text", x = 1, y = (ymin + ymax)/2 + 1, label = paste0("p12: ", convPvalue(p12)),
             color = "black", hjust = 0, size = 3)
  
  
  fileName <- paste0("enhCount_", note)
  height <- 3
  width <- 1.5
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}
plot_enhCount(gene.TSS.tb.plot, "group_binaryGroup", ymax = 200)

```
###### Group
```{r}
## enhCount
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group == group1) )$enhCount
  distance2 <- (data %>% dplyr::filter(group == group2) )$enhCount
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}


plot_enhCount <- function(temp.tb, note, ymin = 0, ymax = 2000000){
  p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
  p15 <- round(getPvalWilcox(temp.tb, "group1", "group5"), 5)
  p18 <- round(getPvalWilcox(temp.tb, "group1", "group8"), 5)
  p25 <- round(getPvalWilcox(temp.tb, "group2", "group5"), 5)
  p28 <- round(getPvalWilcox(temp.tb, "group2", "group8"), 5)
  p58 <- round(getPvalWilcox(temp.tb, "group5", "group8"), 5)
  
  p <- ggplot(temp.tb, aes(x = group, y = enhCount)) + geom_violin(aes(fill = group), show.legend = FALSE) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() +
    annotate("text", x = 1, y = (ymin + ymax)/2 + 1, label = paste0("p12: ", convPvalue(p12), "\n",
                                                         "p15: ", convPvalue(p15), "\n",
                                                         "p18: ", convPvalue(p18), "\n",
                                                         "p25: ", convPvalue(p25), "\n",
                                                         "p28: ", convPvalue(p28), "\n",
                                                         "p58: ",convPvalue( p58), "\n"),
             color = "black", hjust = 0, size = 3)
  
  
  fileName <- paste0("enhCount_", note)
  height <- 3
  width <- 3
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}
plot_enhCount(gene.TSS.tb.plot, "group", ymax = 200)

```
##### geneDensity
###### binaryGroup
```{r}
## geneDensity
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group == group1) )$geneDensity
  distance2 <- (data %>% dplyr::filter(group == group2) )$geneDensity
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}


plot_geneDensity <- function(temp.tb, note, ymin = 0, ymax = 10){
  p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
  
  p <- ggplot(temp.tb, aes(x = ".", fill = group, y = geneDensity)) +
    # Set axis labels (no x-axis title)
    labs(x = NULL, y = "Gene density within TAD") +
    scale_fill_manual(values = (rev(c("#777777","#F28E2C")))) +
    # Violin plot with black outline, customized line width and end
    introdataviz::geom_split_violin(linewidth = lineMedium * mmToLineUnit, lineend = "square",
                                    alpha = .4
    ) +
    
    # Box plot with customized line width, square end, and outlier size
    geom_boxplot(
      width = 0.3, color = "black",
      linewidth = lineMedium * mmToLineUnit, lineend = "square",
      outlier.shape = NA,  alpha = 0.6, show.legend = FALSE
    ) +
    
    # Mean point in each group
    stat_summary(
      aes(group = group), fun = mean,
      geom = "point", shape = 21, size = 0.5,
      fill = "black", color = "black", position = position_dodge(.3)
    ) +
    
    # Horizontal line at y = 0
    geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
    
    # Coordinate limits and custom y-axis labels
    coord_cartesian(ylim = c(ymin, ymax)) +
    
    # Annotate p-value text
    annotate(
      "text", x = 1, y = ymin + 1,
      label = paste0("p12: ", convPvalue(p12)),
      color = "black", hjust = 0, size = 3
    ) +
    
    # Theme customization
    theme_classic() +
    theme(
      axis.title = element_text(
        size = fontSizeM, family = fontType, color = "#000000"
      ),
      axis.text = element_text(
        size = fontSizeS, family = fontType, color = "#000000"
      ),
      axis.text.x = element_text(
        angle = 45, hjust = 1, vjust = 1
      ),
      axis.line = element_line(
        color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
      ),
      axis.ticks = element_line(
        color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
      ),
      panel.background = element_rect(fill = "transparent"),
      legend.text = element_text(family = fontType, size = fontSizeS),
      legend.title = element_text(family = fontType, size = fontSizeS)
    )+   
    guides(
      fill = guide_legend(
        keywidth = 0.2,  # Adjust the width of the legend keys
        keyheight = 0.2  # Adjust the height of the legend keys
      ))
  
  
  
  
  
  fileName <- paste0("geneDensity_", note)
  width <- panelSize(1.5)*mmToInch
  height <- panelSize(1.2)*mmToInch
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}
plot_geneDensity(gene.TSS.tb.plot, "group_binaryGroup", ymax = 8)

```
###### Group
```{r}
## geneDensity
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group == group1) )$geneDensity
  distance2 <- (data %>% dplyr::filter(group == group2) )$geneDensity
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}


plot_geneDensity <- function(temp.tb, note, ymin = 0, ymax = 2000000){
  p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
  p15 <- round(getPvalWilcox(temp.tb, "group1", "group5"), 5)
  p18 <- round(getPvalWilcox(temp.tb, "group1", "group8"), 5)
  p25 <- round(getPvalWilcox(temp.tb, "group2", "group5"), 5)
  p28 <- round(getPvalWilcox(temp.tb, "group2", "group8"), 5)
  p58 <- round(getPvalWilcox(temp.tb, "group5", "group8"), 5)
  
  p <- ggplot(temp.tb, aes(x = group, y = geneDensity)) + geom_violin(aes(fill = group), show.legend = FALSE) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() +
    annotate("text", x = 1, y = (ymin + ymax)/2 + 1, label = paste0("p12: ", convPvalue(p12), "\n",
                                                         "p15: ", convPvalue(p15), "\n",
                                                         "p18: ", convPvalue(p18), "\n",
                                                         "p25: ", convPvalue(p25), "\n",
                                                         "p28: ", convPvalue(p28), "\n",
                                                         "p58: ",convPvalue( p58), "\n"),
             color = "black", hjust = 0, size = 3)
  
  
  fileName <- paste0("geneDensity_", note)
  height <- 3
  width <- 3
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}
plot_geneDensity(gene.TSS.tb.plot, "group", ymax = 10)

```
##### enhDensity
###### Binary group
```{r}
## enhDensity
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group == group1) )$enhDensity
  distance2 <- (data %>% dplyr::filter(group == group2) )$enhDensity
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}


plot_enhDensity <- function(temp.tb, note, ymin = 0, ymax = 2000000){
  p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
  
  p <- ggplot(temp.tb, aes(x = group, y = enhDensity)) + geom_violin(aes(fill = group), show.legend = FALSE) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() +
    annotate("text", x = 1, y = (ymin + ymax)/2 + 1, label = paste0("p12: ", convPvalue(p12)),
             color = "black", hjust = 0, size = 3)
  
  
  fileName <- paste0("enhDensity_", note)
  height <- 3
  width <- 1.5
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}
plot_enhDensity(gene.TSS.tb.plot, "group_binaryGroup", ymax = 10)

```
###### Group
```{r}
## enhDensity
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group == group1) )$enhDensity
  distance2 <- (data %>% dplyr::filter(group == group2) )$enhDensity
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}


plot_enhDensity <- function(temp.tb, note, ymin = 0, ymax = 2000000){
  p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
  p15 <- round(getPvalWilcox(temp.tb, "group1", "group5"), 5)
  p18 <- round(getPvalWilcox(temp.tb, "group1", "group8"), 5)
  p25 <- round(getPvalWilcox(temp.tb, "group2", "group5"), 5)
  p28 <- round(getPvalWilcox(temp.tb, "group2", "group8"), 5)
  p58 <- round(getPvalWilcox(temp.tb, "group5", "group8"), 5)
  
  p <- ggplot(temp.tb, aes(x = group, y = enhDensity)) + geom_violin(aes(fill = group), show.legend = FALSE) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() +
    annotate("text", x = 1, y = (ymin + ymax)/2 + 1, label = paste0("p12: ", convPvalue(p12), "\n",
                                                         "p15: ", convPvalue(p15), "\n",
                                                         "p18: ", convPvalue(p18), "\n",
                                                         "p25: ", convPvalue(p25), "\n",
                                                         "p28: ", convPvalue(p28), "\n",
                                                         "p58: ",convPvalue( p58), "\n"),
             color = "black", hjust = 0, size = 3)
  
  
  fileName <- paste0("enhDensity_", note)
  height <- 3
  width <- 3
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}
plot_enhDensity(gene.TSS.tb.plot, "group", ymax = 10)

```
##### regdensity
```{r}
## regDensity
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group == group1) )$regDensity
  distance2 <- (data %>% dplyr::filter(group == group2) )$regDensity
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}


plot_regDensity <- function(temp.tb, note, ymin = 0, ymax = 2000000){
  p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
  p15 <- round(getPvalWilcox(temp.tb, "group1", "group5"), 5)
  p18 <- round(getPvalWilcox(temp.tb, "group1", "group8"), 5)
  p25 <- round(getPvalWilcox(temp.tb, "group2", "group5"), 5)
  p28 <- round(getPvalWilcox(temp.tb, "group2", "group8"), 5)
  p58 <- round(getPvalWilcox(temp.tb, "group5", "group8"), 5)
  
  p <- ggplot(temp.tb, aes(x = group, y = regDensity)) + geom_violin(aes(fill = group), show.legend = FALSE) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() +
    annotate("text", x = 1, y = (ymin + ymax)/2 + 1, label = paste0("p12: ", convPvalue(p12), "\n",
                                                         "p15: ", convPvalue(p15), "\n",
                                                         "p18: ", convPvalue(p18), "\n",
                                                         "p25: ", convPvalue(p25), "\n",
                                                         "p28: ", convPvalue(p28), "\n",
                                                         "p58: ",convPvalue( p58), "\n"),
             color = "black", hjust = 0, size = 3)
  
  
  fileName <- paste0("regDensity_", note)
  height <- 3
  width <- 3
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}
plot_regDensity(gene.TSS.tb.plot, "group", ymax = 10)

```
#### Checking TAD average log2FC
```{r}
diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj)

temp <- left_join(gene.TSS.tb, diff.RNA, by = c("ensembl" = "ensembl_gene_id")) %>% dplyr::filter(!is.na(log2FoldChange),
                                                                                                  !is.na(TAD))


data <- temp %>% group_by(TAD) %>%
  dplyr::summarise(absAvgLog2FC = abs(mean(log2FoldChange)),
                   avgLog2FC = mean(log2FoldChange),
                   geneDensity = mean(geneDensity),
                   enhDensity = mean(enhDensity),
                   regDensity = mean(regDensity))

data$geneDensityGroup <- cut(
  data$geneDensity,
  breaks = quantile(data$geneDensity, probs = seq(0, 1, 0.2), na.rm = TRUE),
  include.lowest = TRUE,
  labels = paste0(seq(0, 80, 20), "-", seq(20, 100, 20), "%")
)

data$enhDensityGroup <- cut(
  data$enhDensity,
  breaks = quantile(data$enhDensity, probs = seq(0, 1, 0.2), na.rm = TRUE),
  include.lowest = TRUE,
  labels = paste0(seq(0, 80, 20), "-", seq(20, 100, 20), "%")
)


### Ploting  grouping

p <- ggplot(data, aes(x = enhDensityGroup, y = enhDensity, fill = enhDensityGroup)) + 
  geom_boxplot(color = "black",
               linewidth = lineThick * mmToLineUnit, lineend = "square",
               outlier.shape = NA) + theme_classic() +
  coord_cartesian(ylim = c(0, quantile(data$enhDensity, 0.99))) +
  labs(x = "Enhancer Density Group" , y = "TAD enhancer density") +
  scale_fill_manual(values = c("#D9D9D9", "#BFBFBF", "#A6A6A6", "#8C8C8C", "#737373")) +
  theme(
    legend.position = "none",
    axis.title = element_text(
      size = fontSizeM, family = fontType, color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45, hjust = 1, vjust = 1
    ),
    axis.line = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )

fileName <- paste0("tadGroup_enhDensity")
width <- panelSize(1.2)*mmToInch
height <- panelSize(1.5)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()




p <- ggplot(data, aes(x = geneDensityGroup, y = geneDensity, fill = geneDensityGroup)) + 
  geom_boxplot(color = "black",
               linewidth = lineMedium * mmToLineUnit, lineend = "square",
               outlier.shape = NA) + theme_classic() +
  coord_cartesian(ylim = c(0, quantile(data$geneDensity, 0.99))) +
  labs(x = "Gene Density Group" , y = "Gene density") +
  scale_fill_manual(values = c("#777777", "#8B7E65", "#A28452", "#C2884D", "#F28E2C")) +
  theme(
    legend.position = "none",
    axis.title = element_text(
      size = fontSizeM, family = fontType, color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45, hjust = 1, vjust = 1
    ),
    axis.line = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )
fileName <- paste0("tadGroup_geneDensity")
width <- 31*mmToInch
height <- 38*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()


### Plotting distribution - enh
 getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(enhDensityGroup ==group1) )$avgLog2FC
  distance2 <- (data %>% dplyr::filter(enhDensityGroup ==group2) )$avgLog2FC
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}


#### ALL
# ylim <- max(abs(quantile(data$avgLog2FC, 0.01)), abs(quantile(data$avgLog2FC, 0.99)))
# p <- ggplot(data, aes(x = enhDensityGroup, y = avgLog2FC)) + geom_violin(aes(fill = enhDensityGroup))+ 
#   geom_boxplot(outlier.shape = NA, width = 0.1) + theme_classic() +
#   coord_cartesian(ylim = c(-ylim, ylim)) +
#   geom_hline(yintercept = 0) +   theme(legend.position = "none") +
#   scale_fill_manual(values = c("#d9d9d9", "#bdbdbd", "#969696", "#737373", "#525252"))
# 
# fileName <- paste0("tadGroup_enhDensity_avgLog2FC")
# height <- 3
# width <- 3
# png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
# print(p)
# dev.off()
# svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
# print(p)
# dev.off()
#### UP
data.plus <- data %>% dplyr::filter(avgLog2FC > 0)
ps01 <- round(getPvalWilcox(data.plus, "0-20%", "20-40%"), 5)
ps12 <- round(getPvalWilcox(data.plus, "20-40%", "40-60%"), 5)
ps23 <- round(getPvalWilcox(data.plus, "40-60%", "60-80%"), 5)
ps34 <- round(getPvalWilcox(data.plus, "60-80%", "80-100%"), 5)



p <- ggplot(data.plus, aes(x = enhDensityGroup, y = avgLog2FC)) + 
  geom_violin(aes(fill = enhDensityGroup),
              color = "black",
              linewidth = lineThick * mmToLineUnit, lineend = "square",
              show.legend = FALSE)+ 
  geom_boxplot(width = 0.1, color = "black",
               linewidth = lineThick * mmToLineUnit, lineend = "square",
               outlier.shape = NA) + theme_classic() +
  geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
  labs(x = "Enhancer density group" , y = "avg log2(fold change)") +
  stat_summary(
    aes(group = enhDensityGroup), fun = mean,
    geom = "point", shape = 21, size = 1,
    fill = "red", color = "black"
  ) +
  scale_fill_manual(values = c("#D9D9D9", "#BFBFBF", "#A6A6A6", "#8C8C8C", "#737373")) +
  theme(
    axis.title = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45, hjust = 1, vjust = 1
    ),
    axis.line = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )+ coord_cartesian(ylim = c(0, 1.5)) +
  annotate("text", x = 1, y = 1, label = paste0("ps01: ", convPvalue(ps01), "\n",
                                                "ps12: ", convPvalue(ps12), "\n",
                                                "ps23: ", convPvalue(ps23), "\n",
                                                "ps34: ", convPvalue(ps34), "\n"), 
           color = "black", hjust = 0, size = 2)


fileName <- paste0("tadGroup_enhDensity_avgLog2FC_up")
width <- panelSize(1.15)*mmToInch
height <- panelSize(1.07)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()


#### DOWN
data.minus <- data %>% dplyr::filter(avgLog2FC < 0)
ps01 <- round(getPvalWilcox(data.minus, "0-20%", "20-40%"), 5)
ps12 <- round(getPvalWilcox(data.minus, "20-40%", "40-60%"), 5)
ps23 <- round(getPvalWilcox(data.minus, "40-60%", "60-80%"), 5)
ps34 <- round(getPvalWilcox(data.minus, "60-80%", "80-100%"), 5)


p <- ggplot(data.minus, aes(x = enhDensityGroup, y = avgLog2FC)) + 
  geom_violin(aes(fill = enhDensityGroup),
              color = "black",
              linewidth = lineThick * mmToLineUnit, lineend = "square",
              show.legend = FALSE)+ 
  geom_boxplot(width = 0.1, color = "black",
               linewidth = lineThick * mmToLineUnit, lineend = "square",
               outlier.shape = NA) + theme_classic() +
  geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
  labs(x = "Enhancer density group" , y = "avg log2(fold change)") +
  stat_summary(
    aes(group = enhDensityGroup), fun = mean,
    geom = "point", shape = 21, size = 1,
    fill = "red", color = "black"
  ) +
  scale_fill_manual(values = c("#D9D9D9", "#BFBFBF", "#A6A6A6", "#8C8C8C", "#737373")) +
  theme(
    axis.title = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45, hjust = 1, vjust = 1
    ),
    axis.line = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )+ coord_cartesian(ylim = c(-1.5, 0))+
  annotate("text", x = 1, y = -1, label = paste0("ps01: ", convPvalue(ps01), "\n",
                                                "ps12: ", convPvalue(ps12), "\n",
                                                "ps23: ", convPvalue(ps23), "\n",
                                                "ps34: ", convPvalue(ps34), "\n"), 
           color = "black", hjust = 0, size = 2)

fileName <- paste0("tadGroup_enhDensity_avgLog2FC_down")
width <- panelSize(1.175)*mmToInch
height <- panelSize(1.07)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()



### Plotting distribution - gene
#### ALL
# ylim <- max(abs(quantile(data$avgLog2FC, 0.01)), abs(quantile(data$avgLog2FC, 0.99)))
# p <- ggplot(data, aes(x = geneDensityGroup, y = avgLog2FC)) + geom_violin(aes(fill = geneDensityGroup))+ 
#   geom_boxplot(outlier.shape = NA, width = 0.1) + theme_classic() +
#   coord_cartesian(ylim = c(-ylim, ylim)) +
#   geom_hline(yintercept = 0) +   theme(legend.position = "none") +
#   scale_fill_manual(values = c("#d9d9d9", "#bdbdbd", "#969696", "#737373", "#525252"))
# 
# fileName <- paste0("tadGroup_geneDensity_avgLog2FC")
# height <- 3
# width <- 3
# png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
# print(p)
# dev.off()
# svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
# print(p)
# dev.off()
 getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(geneDensityGroup ==group1) )$avgLog2FC
  distance2 <- (data %>% dplyr::filter(geneDensityGroup ==group2) )$avgLog2FC
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}


data.plus <- data %>% dplyr::filter(avgLog2FC > 0)
ps01 <- round(getPvalWilcox(data.plus, "0-20%", "20-40%"), 5)
ps12 <- round(getPvalWilcox(data.plus, "20-40%", "40-60%"), 5)
ps23 <- round(getPvalWilcox(data.plus, "40-60%", "60-80%"), 5)
ps34 <- round(getPvalWilcox(data.plus, "60-80%", "80-100%"), 5)



p <- ggplot(data.plus, aes(x = geneDensityGroup, y = avgLog2FC)) + 
  geom_violin(aes(fill = geneDensityGroup),
              color = "black",
              linewidth = lineThick * mmToLineUnit, lineend = "square",
              show.legend = FALSE)+ 
  geom_boxplot(width = 0.1, color = "black",
               linewidth = lineThick * mmToLineUnit, lineend = "square",
               outlier.shape = NA) + theme_classic() +
  geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
  labs(x = "Gene density group" , y = "avg log2(fold change)") +
  stat_summary(
    aes(group = geneDensityGroup), fun = mean,
    geom = "point", shape = 21, size = 1,
    fill = "red", color = "black"
  ) +
  scale_fill_manual(values = c("#D9D9D9", "#BFBFBF", "#A6A6A6", "#8C8C8C", "#737373")) +
  theme(
    axis.title = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45, hjust = 1, vjust = 1
    ),
    axis.line = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )+ coord_cartesian(ylim = c(0, 1.5)) +
  annotate("text", x = 1, y = 1, label = paste0("ps01: ", convPvalue(ps01), "\n",
                                                "ps12: ", convPvalue(ps12), "\n",
                                                "ps23: ", convPvalue(ps23), "\n",
                                                "ps34: ", convPvalue(ps34), "\n"), 
           color = "black", hjust = 0, size = 2)


fileName <- paste0("tadGroup_geneDensity_avgLog2FC_up")
width <- panelSize(1.15)*mmToInch
height <- panelSize(1.07)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()


#### DOWN
data.minus <- data %>% dplyr::filter(avgLog2FC < 0)
ps01 <- round(getPvalWilcox(data.minus, "0-20%", "20-40%"), 5)
ps12 <- round(getPvalWilcox(data.minus, "20-40%", "40-60%"), 5)
ps23 <- round(getPvalWilcox(data.minus, "40-60%", "60-80%"), 5)
ps34 <- round(getPvalWilcox(data.minus, "60-80%", "80-100%"), 5)


p <- ggplot(data.minus, aes(x = geneDensityGroup, y = avgLog2FC)) + 
  geom_violin(aes(fill = geneDensityGroup),
              color = "black",
              linewidth = lineThick * mmToLineUnit, lineend = "square",
              show.legend = FALSE)+ 
  geom_boxplot(width = 0.1, color = "black",
               linewidth = lineThick * mmToLineUnit, lineend = "square",
               outlier.shape = NA) + theme_classic() +
  geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
  labs(x = "Gene density group" , y = "avg log2(fold change)") +
  stat_summary(
    aes(group = geneDensityGroup), fun = mean,
    geom = "point", shape = 21, size = 1,
    fill = "red", color = "black"
  ) +
  scale_fill_manual(values = c("#D9D9D9", "#BFBFBF", "#A6A6A6", "#8C8C8C", "#737373")) +
  theme(
    axis.title = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45, hjust = 1, vjust = 1
    ),
    axis.line = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )+ coord_cartesian(ylim = c(-1.5, 0))+
  annotate("text", x = 1, y = -1, label = paste0("ps01: ", convPvalue(ps01), "\n",
                                                "ps12: ", convPvalue(ps12), "\n",
                                                "ps23: ", convPvalue(ps23), "\n",
                                                "ps34: ", convPvalue(ps34), "\n"), 
           color = "black", hjust = 0, size = 2)

fileName <- paste0("tadGroup_geneDensity_avgLog2FC_down")
width <- panelSize(1.175)*mmToInch
height <- panelSize(1.07)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()


### CDF plot

ks_result <- ks.test(
  data %>% dplyr::filter(geneDensityGroup == "0-20%") %>% pull(absAvgLog2FC),
  data %>% dplyr::filter(geneDensityGroup == "20-40%") %>% pull(absAvgLog2FC)
)


p <- ggplot(data, aes(x = absAvgLog2FC, color = geneDensityGroup)) +
  scale_color_manual(values = rev(c("#777777", "#8B7E65", "#A28452", "#C2884D", "#F28E2C"))) +
  stat_ecdf(size = 0.4, linewidth = lineMedium * mmToLineUnit, lineend = "square" ) + # Use stat_ecdf to plot the empirical CDF
  labs(
    x = "Abs. log2(fold change)",
    y = "Cumulative Probability"
  ) + coord_cartesian(xlim = c(0, 1.5)) +
  theme_classic() + # Clean theme
  theme(
      axis.title = element_text(
        size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.position = "none",
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS),
    ) + scale_y_continuous(labels = scales::number_format(accuracy = 0.1))


fileName <- paste0("tadGroup_geneDensity_avgLog2FC_cdf")
width <- 33*mmToInch
height <-33*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

data <- data.minus

group1_data <- data$absAvgLog2FC[data$geneDensityGroup == "0-20%"]
group2_data <- data$absAvgLog2FC[data$geneDensityGroup == "20-40%"]
ks.test(group1_data, group2_data)

group1_data <- data$absAvgLog2FC[data$geneDensityGroup == "20-40%"]
group2_data <- data$absAvgLog2FC[data$geneDensityGroup == "40-60%"]
ks.test(group1_data, group2_data)

group1_data <- data$absAvgLog2FC[data$geneDensityGroup == "40-60%"]
group2_data <- data$absAvgLog2FC[data$geneDensityGroup == "60-80%"]
ks.test(group1_data, group2_data)

group1_data <- data$absAvgLog2FC[data$geneDensityGroup == "60-80%"]
group2_data <- data$absAvgLog2FC[data$geneDensityGroup == "80-100%"]
ks.test(group1_data, group2_data)


```


## [2.18] Comparing TAD insulation score across samples
Checking how insulation score changes by treatment at DMSO TAD boundaries
```{r}
name <- "chromo_cons_annoHierarchy"
consensus.loop.anno.tb <- fread(here(consensusDir, paste0(name, ".tsv")))

# Import insulation score calculated with python
resultDir <- here("../../result")
insScore.DMSO <- fread(here(resultDir, "TAD", "insulationScore_25kb_G1DMSO.tsv")) %>% 
  dplyr::select(c("chrom", "start", "end", "log2_insulation_score_125000")) %>%
  dplyr::mutate(binID = paste(chrom, start, end, sep = "_"))
colnames(insScore.DMSO) <- c("chrom", "start", "end", "insulationScore", "binID")
insScore.dTAG <- fread(here(resultDir, "TAD", "insulationScore_25kb_G1dTAG.tsv")) %>% 
  dplyr::select(c("chrom", "start", "end", "log2_insulation_score_125000")) %>%
  dplyr::mutate(binID = paste(chrom, start, end, sep = "_"))
colnames(insScore.dTAG) <- c("chrom", "start", "end", "insulationScore", "binID")
insScore.A485 <- fread(here(resultDir, "TAD", "insulationScore_25kb_G1A485.tsv")) %>% 
  dplyr::select(c("chrom", "start", "end", "log2_insulation_score_125000")) %>%
  dplyr::mutate(binID = paste(chrom, start, end, sep = "_"))
colnames(insScore.A485) <- c("chrom", "start", "end", "insulationScore", "binID")

getInsulationScore <- function(chr, coordinate, insScore.tb){
  temp.tb <- insScore.tb %>% dplyr::filter(chrom == chr, start < coordinate, end > coordinate)
  out <- temp.tb$insulationScore
  if(length(out) < 1){
    return(NA)
  }else{
    return(temp.tb$insulationScore)
  }
}

# Importing TAD boundaries
tad_boundary <- fread(here("../../result/TAD", "TAD_25kb_125kb_otsu_boundaries_G1DMSO.bed"))
colnames(tad_boundary) <- c("chr", "start", "end")
tad_boundary <- tad_boundary %>% rowwise() %>%
  dplyr::mutate(tad_id = paste(chr, start, end, sep = "_"),
                center = (start + end)/2,
                insScore_DMSO = getInsulationScore(chr, center, insScore.DMSO),
                insScore_dTAG = getInsulationScore(chr, center, insScore.dTAG),
                insScore_A485 = getInsulationScore(chr, center, insScore.A485))

tad_boundary <- tad_boundary %>% filter(!if_any(everything(), is.na))

tad_boundary$density <- get_density(tad_boundary$insScore_DMSO, tad_boundary$insScore_dTAG, n = 100)
tad_boundary <- tad_boundary %>% dplyr::arrange(density)
ggplot(tad_boundary, aes(x = insScore_DMSO, y = insScore_dTAG, color = density)) + 
  geom_point(show.legend = FALSE) + 
  scale_color_viridis() + coord_fixed() +
  geom_abline(slope = 1, intercept = 0) + theme_bw()



tad_boundary$density <- get_density(tad_boundary$insScore_DMSO, tad_boundary$insScore_A485, n = 100)
tad_boundary <- tad_boundary %>% dplyr::arrange(density)
ggplot(tad_boundary, aes(x = insScore_DMSO, y = insScore_A485, color = density)) + 
  geom_point(show.legend = FALSE) + 
  scale_color_viridis() + coord_fixed() +
  geom_abline(slope = 1, intercept = 0) + theme_bw()




```


## [2.20] Checking how many loops cross the boundary?
Previously, I tried to check the percentage of loops crossing the boundary per group which wasn't fruitful.
This time, try to do this on the differential loops.
#### dTAG
```{r}
### Importing differential regulatory loops

loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_UP_diff0.2.bedpe")) %>%
  dplyr::mutate(group = "UP/NO")
colnames(loop.up) <- c("chr1", "start1", "end1", "chr2", "start2", "end2", "group")
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_NO_diff0.2.bedpe")) %>%
  dplyr::mutate(group = "UP/NO")
colnames(loop.no) <- c("chr1", "start1", "end1", "chr2", "start2", "end2", "group")
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_DOWN_diff0.2.bedpe")) %>%
  dplyr::mutate(group = "DOWN")
colnames(loop.down) <- c("chr1", "start1", "end1", "chr2", "start2", "end2", "group")

loops <- bind_rows(bind_rows(loop.up, loop.no), loop.down)

### Importing TAD boundary
tad_boundary <- fread(here("../../result/TAD", "TAD_25kb_125kb_otsu_boundaries_G1DMSO.bed"))
colnames(tad_boundary) <- c("chr", "start", "end")
tad_boundary <- tad_boundary %>% dplyr::mutate(tad_id = paste(chr, start, end, sep = "_"),
                                               start = start -175000/2,
                                               end = end + 175000/2,
                                               center = (start + end)/2)
### Functions
checkBoundaryCross <- function(chrom1, start1, end2, tad_boundary){
  temp <- tad_boundary %>% dplyr::filter(chr == chrom1,
                                 center > start1,
                                 center < end2)
  return(length(temp$center))
}

### Check overlap
loops <- loops %>% rowwise() %>%
  dplyr::mutate(boundaryCross = checkBoundaryCross(chr1, start1, end2, tad_boundary))

### Plotting
summary_data <- loops %>%
  group_by(group) %>%
  summarise(percentage = mean(boundaryCross > 0) * 100)

summary_data$group <- factor(summary_data$group, levels = c("UP/NO", "DOWN"))

p <- ggplot(summary_data, aes(x = group, y = percentage)) +
  geom_bar(stat = "identity", fill = darken(strong_green, amount = 0.2)) +
  labs(y = "% of loops crossing TAD boundary", x = NULL) +
  theme_classic() + ylim(0, 100) +
  theme(
    axis.title = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
  )

fileName <- paste0("boundaryCrossRatio")
width <- panelSize(1)*mmToInch
height <- panelSize(1.5)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```

#### A485
```{r}
### Importing differential regulatory loops
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_UP_diff0.2.bedpe")) %>%
  dplyr::mutate(group = "up")
colnames(loop.up) <- c("chr1", "start1", "end1", "chr2", "start2", "end2", "group")
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_NO_diff0.2.bedpe")) %>%
  dplyr::mutate(group = "no")
colnames(loop.no) <- c("chr1", "start1", "end1", "chr2", "start2", "end2", "group")
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_DOWN_diff0.2.bedpe")) %>%
  dplyr::mutate(group = "down")
colnames(loop.down) <- c("chr1", "start1", "end1", "chr2", "start2", "end2", "group")

loops <- bind_rows(bind_rows(loop.up, loop.no), loop.down)

### Importing TAD boundary
tad_boundary <- fread(here("../../result/TAD", "TAD_25kb_125kb_otsu_boundaries_G1DMSO.bed"))
colnames(tad_boundary) <- c("chr", "start", "end")
tad_boundary <- tad_boundary %>% dplyr::mutate(tad_id = paste(chr, start, end, sep = "_"),
                                               start = start -175000/2,
                                               end = end + 175000/2,
                                               center = (start + end)/2)
### Functions
checkBoundaryCross <- function(chrom1, start1, end2, tad_boundary){
  temp <- tad_boundary %>% dplyr::filter(chr == chrom1,
                                 center > start1,
                                 center < end2)
  return(length(temp$center))
}

### Check overlap
loops <- loops %>% rowwise() %>%
  dplyr::mutate(boundaryCross = checkBoundaryCross(chr1, start1, end2, tad_boundary))

### Plotting
summary_data <- loops %>%
  group_by(group) %>%
  summarise(percentage = mean(boundaryCross > 0) * 100)


p <- ggplot(summary_data, aes(x = group, y = percentage)) +
  geom_bar(stat = "identity", fill = "skyblue") +
  labs(title = "Percentage of Rows with boundaryCross > 0 per Group",
       x = "Group",
       y = "Percentage") +
  theme_classic() + ylim(0, 100)

fileName <- paste0("boundaryCrossRatio_A485")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```


## [2.21] LOLA on differential loop anchors
##### LOADING LOLA
```{r}
# LOADING LOLA DB
library("simpleCache")
library("LOLA")
lolaDB = loadRegionDB("/Volumes/UKJIN_SSD/Genomics_03_Analysis_Working/reference/LOLACore_cached/mm10")

# FUNCTIONS
extractAnchor <- function(loop){
  anchor1 <- loop %>% dplyr::select(c(1, 2, 3))
  colnames(anchor1) <- c("chr", "start", "end")
  anchor2 <- loop %>% dplyr::select(c(4, 5, 6))
  colnames(anchor2) <- c("chr", "start", "end")
  anchors <- reduce(makeGRangesFromDataFrame(bind_rows(anchor1, anchor2)))
    return(anchors)
}

```
#### dTAG, Comparing to Async
##### - Limited to ATAC-seq signal, reg loop backgroupd
```{r}
atac <- fread(here(refDir, "GSM3106257_ATAC_ESC_1.bed")) %>% dplyr::select(V1, V2, V3)
colnames(atac) <- c("chr", "start", "end")
atac.gr <- makeGRangesFromDataFrame(atac)

# LOADING LOOPS
### Importing differential regulatory loops & extract anchor
loop.all <- fread(here(consensusDir, "chromo_cons_annoHierarchy.tsv")) %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "E-E"))
anchor.all <- (extractAnchor(loop.all))
overlaps <- findOverlaps(anchor.all, atac.gr)
anchor.all <- pintersect(anchor.all[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])



loop.1 <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_G1vsAsync_bothRetained.bedpe"))
anchor.1 <- (extractAnchor(loop.1))
overlaps <- findOverlaps(anchor.1, atac.gr)
anchor.1 <- pintersect(anchor.1[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

loop.2 <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_G1vsAsync_AsyncSpecificPert.bedpe"))
anchor.2 <- (extractAnchor(loop.2))
overlaps <- findOverlaps(anchor.2, atac.gr)
anchor.2 <- pintersect(anchor.2[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

loop.3 <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_G1vsAsync_G1SpecificPert.bedpe"))
anchor.3 <- (extractAnchor(loop.3))
overlaps <- findOverlaps(anchor.3, atac.gr)
anchor.3 <- pintersect(anchor.3[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

loop.4 <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_G1vsAsync_bothPert.bedpe"))
anchor.4 <- (extractAnchor(loop.4))
overlaps <- findOverlaps(anchor.4, atac.gr)
anchor.4 <- pintersect(anchor.4[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])


anchors <- list(anchor.1, anchor.2, anchor.3, anchor.4)
tbs <- list()
temps <- list()

# Process clusters c1 to c8
for (i in 1:4) {
  
  anchor <- anchors[[i]]
  # Run LOLA
  result <- runLOLA(anchor, anchor.all, lolaDB)
  tb <- as_tibble(result)
  
  # Filter and summarize
  tb <- tb %>%
    dplyr::mutate(target = toupper(antibody)) %>%
    filter(str_to_lower(cellType) == "embryonic stem cell") %>%
    dplyr::filter(qValue < alpha) %>%
    dplyr::group_by(target) %>%
    slice_min(meanRnk, with_ties = FALSE)
  
  # Store tb
  tbs[[i]] <- tb
  
  # Select and rename oddsRatio
  temp <- tb %>% dplyr::select(target, oddsRatio)
  colnames(temp) <- c("target", paste0("OR_c", i))
  
  # Store temp
  temps[[i]] <- temp
}

# Merge all temp tables into one
temp <- Reduce(function(x, y) full_join(x, y, by = "target"), temps) %>%
  mutate_all(~replace_na(., 1))
colnames(temp) <- c("target", "bothRetained", "AsyncSpecificPerturb", "G1SpecificPerturb", "bothPerturb")
data <- temp %>% column_to_rownames("target") %>% as.matrix()

library(circlize)
col_fun <- colorRamp2(c(1, max(data)), c("white", "red"))


#fviz_nbclust(data, kmeans, method = "wss")

p <- Heatmap(
  data,
  name = "Odds Ratio",                   # Name of the heatmap legend
  cluster_columns = FALSE,            # Remove column dendrogram
  show_row_dend = FALSE,
  col = col_fun,
  border = TRUE
)



```

#### dTAG
##### - Limiting to ATAC-seq signal, reg loop background
```{r}
atac <- fread(here(refDir, "GSM3106257_ATAC_ESC_1.bed")) %>% dplyr::select(V1, V2, V3)
colnames(atac) <- c("chr", "start", "end")
atac.gr <- makeGRangesFromDataFrame(atac)

# LOADING LOOPS
### Importing differential regulatory loops & extract anchor
loop.all <- fread(here(consensusDir, "chromo_cons_annoHierarchy.tsv")) %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "E-E"))
anchor.all <- (extractAnchor(loop.all))
overlaps <- findOverlaps(anchor.all, atac.gr)
anchor.all <- pintersect(anchor.all[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])


loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_UP_diff0.2.bedpe"))
anchor.up <- (extractAnchor(loop.up))
overlaps <- findOverlaps(anchor.up, atac.gr)
anchor.up <- pintersect(anchor.up[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_NO_diff0.2.bedpe"))
anchor.no <- (extractAnchor(loop.no))
overlaps <- findOverlaps(anchor.no, atac.gr)
anchor.no <- pintersect(anchor.no[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

loop.upno <- bind_rows(loop.up, loop.no)
anchor.upno <- (extractAnchor(loop.upno))
overlaps <- findOverlaps(anchor.upno, atac.gr)
anchor.upno <- pintersect(anchor.upno[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_DOWN_diff0.2.bedpe"))
anchor.down <- (extractAnchor(loop.down))
overlaps <- findOverlaps(anchor.down, atac.gr)
anchor.down <- pintersect(anchor.down[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])



```


```{r}
# RUNNING LOLA
lolaDir <- here("../../result/lola")
dir.create(lolaDir, showWarnings = FALSE, recursive = TRUE)

# UP
result = runLOLA(anchor.up, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_up_atac.tsv"), sep = "\t")

# NO
result = runLOLA(anchor.no, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_no_atac.tsv"), sep = "\t")

# UPNO
result = runLOLA(anchor.upno, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_upno_atac.tsv"), sep = "\t")

# DOWN
result = runLOLA(anchor.down, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_down_atac.tsv"), sep = "\t")


### HEATMAP
alpha <- 0.05
tb.up <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_up_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.no <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_no_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.down <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_down_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)

temp.up <- tb.up %>% dplyr::select(target, oddsRatio)
colnames(temp.up) <- c("target", "OR_up")
temp.no <- tb.no %>% dplyr::select(target, oddsRatio)
colnames(temp.no) <- c("target", "OR_no")
temp.down <- tb.down %>% dplyr::select(target, oddsRatio)
colnames(temp.down) <- c("target", "OR_down")


temp <- full_join(full_join(temp.up, temp.no, by = c("target")), temp.down, by = c("target")) %>% mutate_all(~replace_na(., 1))

data <- as.matrix(temp[2:4])
rownames(data) <- temp$target

library(circlize)
col_fun <- colorRamp2(c(min(data), max(data)), c("white", "red"))


#fviz_nbclust(data, kmeans, method = "wss")

# p <- Heatmap(
#   data,
#   name = "Odds Ratio",                   # Name of the heatmap legend
#   cluster_columns = FALSE,            # Remove column dendrogram
#   row_km = 4,                         # Define the number of k-means clusters for rows (adjust as needed)
#   show_row_dend = FALSE,
#   col = col_fun                       # Use the red gradient color scale
# )
# 
# fileName <- paste0("anchorLOLA_dTAG_vs_DMSO_diff0.2_regAnchorBackground_atac")
# height <- 7
# width <- 3.5
# png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
# print(p)
# dev.off()
# svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
# print(p)
# dev.off()
# 


### Visualizing p-value and OR
alpha <- 0.05
# tb.up <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_up_atac.tsv")) %>%
#   dplyr::mutate(target = toupper(antibody),
#                 qValueLog = -log2(qValue),
#                 group = "UP") %>%
#   filter(str_to_lower(cellType) == "embryonic stem cell") %>%
#   dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
#   slice_min(meanRnk, with_ties = FALSE)
# tb.no <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_no_atac.tsv")) %>%
#   dplyr::mutate(target = toupper(antibody),
#                 qValueLog = -log2(qValue),
#                 group = "NO") %>%
#   filter(str_to_lower(cellType) == "embryonic stem cell") %>%
#   dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
#   slice_min(meanRnk, with_ties = FALSE)

tb.upno <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_upno_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody),
                qValueLog = -log10(qValue),
                group = "UP/NO") %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
fwrite(tb.upno %>% dplyr::select(c(24, 16, 20,
                                   4, 25, 5,
                                   7, 8, 9, 10, 11,
                                   12, 13, 14)), here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_upno_atac_pub.tsv"), sep = "\t")

tb.down <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_down_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody),
                qValueLog = -log10(qValue),
                group = "DOWN") %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
fwrite(tb.down %>% dplyr::select(c(24, 16, 20,
                                   4, 25, 5,
                                   7, 8, 9, 10, 11,
                                   12, 13, 14)), here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_down_atac_pub.tsv"), sep = "\t")


temp.upno <- tb.upno %>% dplyr::select(target, oddsRatio, qValueLog, group)
temp.down <- tb.down %>% dplyr::select(target, oddsRatio, qValueLog, group)

temp <- bind_rows(temp.upno, temp.down)

# order <- c((temp %>% dplyr::filter(group == "UP/NO") %>% arrange(desc(oddsRatio)))$target, 
#            (temp %>% dplyr::filter(group == "DOWN") %>% arrange(desc(oddsRatio)))$target)

# temp$target <- factor(temp$target, levels = rev(order))
temp$group <- factor(temp$group, levels = c("UP/NO", "DOWN"))

targetList <- c("POLR2A", "CTR9",
                "AFF4", "ELL2",
                "MED1", "MED12",
                "TBP", "TAF1", "TAF3",
                "E2F1", "YY1", "NIPBL", 
                "EP300", "DPY30", "SETDB1",
                "RAD21", "SMC1A", "SMC3", "CTCF",
                "SUZ12", "PHF19"
)
temp <- temp %>% dplyr::filter(target %in% targetList)
temp$target <- factor(temp$target, levels = rev(targetList))


qValueLogMax <- 50
temp2 <- temp %>% dplyr::mutate(qValueLog = min(qValueLog, qValueLogMax))

p <- ggplot(temp2, aes(x = group, y = target, fill = oddsRatio, size = qValueLog)) +
  geom_point(shape = 21,        # Ensures a point with an outline
             stroke = 1*ptToMM      # Line width for the border
  ) + theme_bw() + 
  scale_size_continuous(range = c(0.5, 2)) +  # Set min and max point sizes here
  scale_fill_gradient(low = "white", high = "#CB333A",
                      limits = c(1, 3),
                      oob = scales::squish, # Define gradient colors
                      guide = guide_colorbar(
                        barwidth = 1.5/5.08,  # Adjust width of the color bar
                        barheight = 15/5.08   # Adjust height of the color bar
                      )
  ) + 
  labs(x = NULL, y = NULL)  +
  theme(
    axis.title = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )
# p <- ggplot(temp, aes(x = group, y = target, color = qValueLog, size = oddsRatio)) +
#   geom_point() + theme_bw() + 
#   scale_size_continuous(range = c(1, 3)) +  # Set min and max point sizes here
#   scale_color_gradient(low = "blue", high = "red",
#                        guide = guide_colorbar(
#                          barwidth = 1.5/5.08,  # Adjust width of the color bar
#                          barheight = 15/5.08    # Adjust height of the color bar
#                        )) +
#   labs(x = NULL, y = NULL)  +
#   theme(
#     axis.title = element_text(
#       size = fontSizeS,
#       family = fontType,
#       color = "#000000"
#     ),
#     axis.text = element_text(
#       size = fontSizeS,
#       family = fontType,
#       color = "#000000"
#     ),
#     axis.text.x = element_text(
#       angle = 45,      # Rotate x-axis labels 45 degrees
#       hjust = 1,       # Adjust horizontal justification
#       vjust = 1        # Adjust vertical justification
#     ),
#     axis.line = element_line(
#       color = "#000000",
#       size = lineThick*mmToLineUnit,
#       lineend = "square"
#     ),
#     axis.ticks = element_line(
#       color = "#000000",
#       size = lineThick*mmToLineUnit,
#       lineend = "square"
#     ),
#     panel.background = element_rect(fill = "transparent"),
#     legend.text = element_text(family = fontType, size = fontSizeS),
#     legend.title = element_text(family = fontType, size = fontSizeS)
#   )

fileName <- paste0("anchorLOLA_dTAG_vs_DMSO_diff0.2_OR_qValue_regAnchorBackground_atac_upno")
width <- panelSize(1.5)*mmToInch
height <- panelSize(1.9)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```


```{r}
### 241015 Testing on subset of loops
anchor.c1 <- extractAnchor(loop.cluster1)
overlaps <- findOverlaps(anchor.c1, atac.gr)
anchor.c1 <- pintersect(anchor.c1[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

anchor.c2 <- extractAnchor(loop.cluster2)
overlaps <- findOverlaps(anchor.c2, atac.gr)
anchor.c2 <- pintersect(anchor.c2[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

anchor.c3 <- extractAnchor(loop.cluster3)
overlaps <- findOverlaps(anchor.c3, atac.gr)
anchor.c3 <- pintersect(anchor.c3[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

anchor.c4 <- extractAnchor(loop.cluster4)
overlaps <- findOverlaps(anchor.c4, atac.gr)
anchor.c4 <- pintersect(anchor.c4[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

anchor.c5 <- extractAnchor(loop.cluster5)
overlaps <- findOverlaps(anchor.c5, atac.gr)
anchor.c5 <- pintersect(anchor.c5[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

anchor.c6 <- extractAnchor(loop.cluster6)
overlaps <- findOverlaps(anchor.c6, atac.gr)
anchor.c6 <- pintersect(anchor.c6[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

anchor.c7 <- extractAnchor(loop.cluster7)
overlaps <- findOverlaps(anchor.c7, atac.gr)
anchor.c7 <- pintersect(anchor.c7[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

anchor.c8 <- extractAnchor(loop.cluster8)
overlaps <- findOverlaps(anchor.c8, atac.gr)
anchor.c8 <- pintersect(anchor.c8[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

result = runLOLA(anchor.c1, anchor.all, lolaDB)
tb.c1 = as_tibble(result)

result = runLOLA(anchor.c2, anchor.all, lolaDB)
tb.c2 = as_tibble(result)

result = runLOLA(anchor.c3, anchor.all, lolaDB)
tb.c3 = as_tibble(result)

result = runLOLA(anchor.c4, anchor.all, lolaDB)
tb.c4 = as_tibble(result)
result = runLOLA(anchor.c5, anchor.all, lolaDB)
tb.c5 = as_tibble(result)

result = runLOLA(anchor.c6, anchor.all, lolaDB)
tb.c6 = as_tibble(result)

result = runLOLA(anchor.c7, anchor.all, lolaDB)
tb.c7 = as_tibble(result)

result = runLOLA(anchor.c8, anchor.all, lolaDB)
tb.c8 = as_tibble(result)

### HEATMAP
alpha <- 0.05
tb.c1 <- tb.c1 %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.c2 <- tb.c2 %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.c3 <- tb.c3 %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.c4 <- tb.c4 %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.c5 <- tb.c5 %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>%
  dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)

tb.c6 <- tb.c6 %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>%
  dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)

tb.c7 <- tb.c7 %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>%
  dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)

tb.c8 <- tb.c8 %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>%
  dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)

temp.c1 <- tb.c1 %>% dplyr::select(target, oddsRatio)
colnames(temp.c1) <- c("target", "OR_c1")
temp.c2 <- tb.c2 %>% dplyr::select(target, oddsRatio)
colnames(temp.c2) <- c("target", "OR_c2")
temp.c3 <- tb.c3 %>% dplyr::select(target, oddsRatio)
colnames(temp.c3) <- c("target", "OR_c3")
temp.c4 <- tb.c4 %>% dplyr::select(target, oddsRatio)
colnames(temp.c4) <- c("target", "OR_c4")
temp.c5 <- tb.c5 %>% dplyr::select(target, oddsRatio)
colnames(temp.c5) <- c("target", "OR_c5")
temp.c6 <- tb.c6 %>% dplyr::select(target, oddsRatio)
colnames(temp.c6) <- c("target", "OR_c6")
temp.c7 <- tb.c7 %>% dplyr::select(target, oddsRatio)
colnames(temp.c7) <- c("target", "OR_c7")
temp.c8 <- tb.c8 %>% dplyr::select(target, oddsRatio)
colnames(temp.c8) <- c("target", "OR_c8")

temp <- full_join(temp.c1, temp.c2, by = "target") %>%
  full_join(temp.c3, by = "target") %>%
  full_join(temp.c4, by = "target") %>%
  full_join(temp.c5, by = "target") %>%
  full_join(temp.c6, by = "target") %>%
  full_join(temp.c7, by = "target") %>%
  full_join(temp.c8, by = "target") %>%
  mutate_all(~replace_na(., 1))

data <- temp %>% column_to_rownames("target") %>% as.matrix()

library(circlize)
col_fun <- colorRamp2(c(1, max(data)), c("white", "red"))


#fviz_nbclust(data, kmeans, method = "wss")

p <- Heatmap(
  data,
  name = "Odds Ratio",                   # Name of the heatmap legend
  cluster_columns = FALSE,            # Remove column dendrogram
  show_row_dend = FALSE,
  col = col_fun,
  border = TRUE
)


# Initialize lists to store results
anchors <- list()
tbs <- list()
temps <- list()

# Process clusters c1 to c8
for (i in 1:4) {
  # Extract anchor
  loop_cluster <- get(paste0("loop.cluster", i))
  anchor <- extractAnchor(loop_cluster)
  
  # Find overlaps and intersect
  overlaps <- findOverlaps(anchor, atac.gr)
  anchor <- pintersect(anchor[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
  
  # Store anchor
  anchors[[i]] <- anchor
  
  # Run LOLA
  result <- runLOLA(anchor, anchor.all, lolaDB)
  tb <- as_tibble(result)
  
  # Filter and summarize
  tb <- tb %>%
    dplyr::mutate(target = toupper(antibody)) %>%
    filter(str_to_lower(cellType) == "embryonic stem cell") %>%
    dplyr::filter(qValue < alpha) %>%
    dplyr::group_by(target) %>%
    slice_min(meanRnk, with_ties = FALSE)
  
  # Store tb
  tbs[[i]] <- tb
  
  # Select and rename oddsRatio
  temp <- tb %>% dplyr::select(target, oddsRatio)
  colnames(temp) <- c("target", paste0("OR_c", i))
  
  # Store temp
  temps[[i]] <- temp
}

# Merge all temp tables into one
temp <- Reduce(function(x, y) full_join(x, y, by = "target"), temps) %>%
  mutate_all(~replace_na(., 1))
data <- temp %>% column_to_rownames("target") %>% as.matrix()

library(circlize)
col_fun <- colorRamp2(c(1, max(data)), c("white", "red"))


#fviz_nbclust(data, kmeans, method = "wss")

p <- Heatmap(
  data,
  name = "Odds Ratio",                   # Name of the heatmap legend
  cluster_columns = FALSE,            # Remove column dendrogram
  show_row_dend = FALSE,
  col = col_fun,
  border = TRUE
)
```



##### - Limited to ATAC-seq signal, all loop backgroup
```{r}
atac <- fread(here(refDir, "GSM3106257_ATAC_ESC_1.bed")) %>% dplyr::select(V1, V2, V3)
colnames(atac) <- c("chr", "start", "end")
atac.gr <- makeGRangesFromDataFrame(atac)

# LOADING LOOPS
### Importing differential regulatory loops & extract anchor
loop.all <- fread(here(consensusDir, "chromo_cons_annoHierarchy.tsv"))
anchor.all <- (extractAnchor(loop.all))
overlaps <- findOverlaps(anchor.all, atac.gr)
anchor.all <- pintersect(anchor.all[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_UP_diff0.2.bedpe"))
anchor.up <- (extractAnchor(loop.up))
overlaps <- findOverlaps(anchor.up, atac.gr)
anchor.up <- pintersect(anchor.up[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_NO_diff0.2.bedpe"))
anchor.no <- (extractAnchor(loop.no))
overlaps <- findOverlaps(anchor.no, atac.gr)
anchor.no <- pintersect(anchor.no[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

loop.upno <- bind_rows(loop.up, loop.no)
anchor.upno <- (extractAnchor(loop.upno))
overlaps <- findOverlaps(anchor.upno, atac.gr)
anchor.upno <- pintersect(anchor.upno[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])


loopNum <- nrow(loop.upno)

# Selecting same number of extreme down loops
loop.down <- loop.all %>% dplyr::filter(diff_dTAG_DMSO < -0.2) %>% dplyr::arrange(diff_dTAG_DMSO) %>% slice_head(n = loopNum)
anchor.down <- (extractAnchor(loop.down))
overlaps <- findOverlaps(anchor.down, atac.gr)
anchor.down <- pintersect(anchor.down[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

```


```{r}
# RUNNING LOLA
lolaDir <- here("../../result/lola")
dir.create(lolaDir, showWarnings = FALSE, recursive = TRUE)

# UP
result = runLOLA(anchor.up, anchor.all, lolaDB)
tb = as_tibble(result)
#fwrite(tb, here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_up_allLoops_extreme_atac.tsv"), sep = "\t")

# NO
result = runLOLA(anchor.no, anchor.all, lolaDB)
tb = as_tibble(result)
#fwrite(tb, here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_no_allLoops_extreme_atac.tsv"), sep = "\t")

# UPNO
result = runLOLA(anchor.upno, anchor.all, lolaDB)
tb = as_tibble(result)
#fwrite(tb, here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_upno_allLoops_extreme_atac.tsv"), sep = "\t")


# DOWN
result = runLOLA(anchor.down, anchor.all, lolaDB)
tb = as_tibble(result)
#fwrite(tb, here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_down_allLoops_extreme_atac.tsv"), sep = "\t")



### HEATMAP
alpha <- 0.05
tb.up <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_up_allLoops_extreme_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.no <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_no_allLoops_extreme_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.down <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_down_allLoops_extreme_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)

temp.up <- tb.up %>% dplyr::select(target, oddsRatio)
colnames(temp.up) <- c("target", "OR_up")
temp.no <- tb.no %>% dplyr::select(target, oddsRatio)
colnames(temp.no) <- c("target", "OR_no")
temp.down <- tb.down %>% dplyr::select(target, oddsRatio)
colnames(temp.down) <- c("target", "OR_down")


temp <- full_join(full_join(temp.up, temp.no, by = c("target")), temp.down, by = c("target")) %>% mutate_all(~replace_na(., 1))

data <- as.matrix(temp[2:4])
rownames(data) <- temp$target

library(circlize)
col_fun <- colorRamp2(c(min(data), max(data)), c("white", "red"))


#fviz_nbclust(data, kmeans, method = "wss")

# p <- Heatmap(
#   data,
#   name = "Odds Ratio",                   # Name of the heatmap legend
#   cluster_columns = FALSE,            # Remove column dendrogram
#   row_km = 10,                         # Define the number of k-means clusters for rows (adjust as needed)
#   show_row_dend = FALSE,
#   col = col_fun,
#   border = TRUE
# )
# 
# fileName <- paste0("anchorLOLA_dTAG_vs_DMSO_diff0.2_allLoops_extreme_allAnchorBackground_atac")
# height <- 7
# width <- 3.5
# png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
# print(p)
# dev.off()
# svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
# print(p)
# dev.off()




### Visualizing p-value and OR
alpha <- 0.05
# tb.up <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_up_allLoops_extreme_atac.tsv")) %>%
#   dplyr::mutate(target = toupper(antibody),
#                 qValueLog = -log2(qValue),
#                 group = "UP") %>%
#   filter(str_to_lower(cellType) == "embryonic stem cell") %>%
#   dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
#   slice_min(meanRnk, with_ties = FALSE)
# tb.no <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_no_allLoops_extreme_atac.tsv")) %>%
#   dplyr::mutate(target = toupper(antibody),
#                 qValueLog = -log2(qValue),
#                 group = "NO") %>%
#   filter(str_to_lower(cellType) == "embryonic stem cell") %>%
#   dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
#   slice_min(meanRnk, with_ties = FALSE)
tb.upno <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_upno_allLoops_extreme_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody),
                qValueLog = -log10(qValue),
                group = "UP/NO") %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
fwrite(tb.upno %>% dplyr::select(c(24, 16, 20,
                                   4, 25, 5,
                                   7, 8, 9, 10, 11,
                                   12, 13, 14)), here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_upno_allLoops_extreme_atac_pub.tsv"), sep = "\t")
tb.down <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_down_allLoops_extreme_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody),
                qValueLog = -log10(qValue),
                group = "DOWN") %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
fwrite(tb.down %>% dplyr::select(c(24, 16, 20,
                                   4, 25, 5,
                                   7, 8, 9, 10, 11,
                                   12, 13, 14)), here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_down_allLoops_extreme_atac_pub.tsv"), sep = "\t")

# temp.up <- tb.up %>% dplyr::select(target, oddsRatio, qValueLog, group)
# temp.no <- tb.no %>% dplyr::select(target, oddsRatio, qValueLog, group)
temp.upno <- tb.upno %>% dplyr::select(target, oddsRatio, qValueLog, group)
temp.down <- tb.down %>% dplyr::select(target, oddsRatio, qValueLog, group)

temp <- bind_rows(temp.upno, temp.down)
# 
# order <- c((temp %>% dplyr::filter(group == "UP/NO") %>% arrange(desc(oddsRatio)))$target, 
#            (temp %>% dplyr::filter(group == "DOWN") %>% arrange(desc(oddsRatio)))$target)
# temp$target <- factor(temp$target, levels = rev(order))
temp$group <- factor(temp$group, levels = c("UP/NO", "DOWN"))


targetList <- c("POLR2A", "CTR9",
                "AFF4", "ELL2",
                "MED1", "MED12",
                "TBP", "TAF1", "TAF3",
                "ESRRB", "KLF4", "NANOG", "POU5F1", "SOX2", "STAT3", "E2F1", "YY1",
                "EP300", "DPY30", "EZH2", "KDM2B", "KDB4B", "KDM4C", "KDM6B", "RBBP5",
                "RAD21", "SMC1A", "SMC3", "CTCF",
                "JARID2", "SUZ12"
)
temp <- temp %>% dplyr::filter(target %in% targetList)

temp$target <- factor(temp$target, levels = rev(targetList))
# MAX qValueLog to 50
qValueLogMax <- 50
temp2 <- temp %>% dplyr::mutate(qValueLog = min(qValueLog, qValueLogMax))
p <- ggplot(temp2, aes(x = group, y = target, fill = oddsRatio, size = qValueLog)) +
  geom_point(shape = 21,        # Ensures a point with an outline
             stroke = 0.5*ptToMM      # Line width for the border
  ) + theme_bw() + 
  scale_size_continuous(range = c(0.5, 2)) +  # Set min and max point sizes here
  scale_fill_gradient(low = "white", high = "#CB333A",
                      limits = c(1, 3),
                      oob = scales::squish, # Define gradient colors
                      guide = guide_colorbar(
                        barwidth = 1.5/5.08,  # Adjust width of the color bar
                        barheight = 15/5.08   # Adjust height of the color bar
                      )
  ) + 
  labs(x = NULL, y = NULL)  +
  theme(
    axis.title = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )

fileName <- paste0("anchorLOLA_dTAG_vs_DMSO_diff0.2_allLoops_extreme_OR_qValue_allAnchorBackground_atac_ordered")
width <- panelSize(1.5)*mmToInch
height <- panelSize(2.5)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()




```

### dTAG, obsexp

##### - Limited to ATAC-seq signal, all loop backgroup
```{r}
atac <- fread(here(refDir, "GSM3106257_ATAC_ESC_1.bed")) %>% dplyr::select(V1, V2, V3)
colnames(atac) <- c("chr", "start", "end")
atac.gr <- makeGRangesFromDataFrame(atac)

# LOADING LOOPS
### Importing differential regulatory loops & extract anchor
loop.all <- fread(here(consensusDir, "chromo_cons_annoHierarchy.tsv"))
anchor.all <- (extractAnchor(loop.all))
overlaps <- findOverlaps(anchor.all, atac.gr)
anchor.all <- pintersect(anchor.all[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_logOE_all_dTAGvsDMSO_UP_diff0.5.bedpe"))
anchor.up <- (extractAnchor(loop.up))
overlaps <- findOverlaps(anchor.up, atac.gr)
anchor.up <- pintersect(anchor.up[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_logOE_all_dTAGvsDMSO_NO_diff0.5.bedpe"))
anchor.no <- (extractAnchor(loop.no))
overlaps <- findOverlaps(anchor.no, atac.gr)
anchor.no <- pintersect(anchor.no[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])


loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_logOE_all_dTAGvsDMSO_DOWN_diff0.5.bedpe"))
anchor.down <- (extractAnchor(loop.down))
overlaps <- findOverlaps(anchor.down, atac.gr)
anchor.down <- pintersect(anchor.down[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])


```


```{r}
# RUNNING LOLA
lolaDir <- here("../../result/lola")
dir.create(lolaDir, showWarnings = FALSE, recursive = TRUE)

# UP
result = runLOLA(anchor.up, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_up_allLoops_atac.tsv"), sep = "\t")

# NO
result = runLOLA(anchor.no, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_no_allLoops_atac.tsv"), sep = "\t")

# DOWN
result = runLOLA(anchor.down, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_down_allLoops_atac.tsv"), sep = "\t")



### HEATMAP
alpha <- 0.05
tb.up <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_up_allLoops_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.no <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_no_allLoops_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.down <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_down_allLoops_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)

temp.up <- tb.up %>% dplyr::select(target, oddsRatio)
colnames(temp.up) <- c("target", "OR_up")
temp.no <- tb.no %>% dplyr::select(target, oddsRatio)
colnames(temp.no) <- c("target", "OR_no")
temp.down <- tb.down %>% dplyr::select(target, oddsRatio)
colnames(temp.down) <- c("target", "OR_down")


temp <- full_join(full_join(temp.up, temp.no, by = c("target")), temp.down, by = c("target")) %>% mutate_all(~replace_na(., 1))

data <- as.matrix(temp[2:4])
rownames(data) <- temp$target

library(circlize)
col_fun <- colorRamp2(c(min(data), max(data)), c("white", "red"))


#fviz_nbclust(data, kmeans, method = "wss")

p <- Heatmap(
  data,
  name = "Odds Ratio",                   # Name of the heatmap legend
  cluster_columns = FALSE,            # Remove column dendrogram
  row_km = 4,                        # Define the number of k-means clusters for rows (adjust as needed)
  show_row_dend = FALSE,
  col = col_fun,
  border = TRUE
)

fileName <- paste0("anchorLOLA_dTAG_vs_DMSO_logOE_diff0.5_allLoops_allAnchorBackground_atac")
height <- 7
width <- 3.5
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()


############
# 
# ### Visualizing p-value and OR
# alpha <- 0.05
# tb.up <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_up_allLoops_atac.tsv")) %>%
#   dplyr::mutate(target = toupper(antibody),
#                 qValueLog = -log2(qValue),
#                 group = "UP") %>%
#   filter(str_to_lower(cellType) == "embryonic stem cell") %>%
#   dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
#   slice_min(meanRnk, with_ties = FALSE)
# tb.no <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_no_allLoops_atac.tsv")) %>%
#   dplyr::mutate(target = toupper(antibody),
#                 qValueLog = -log2(qValue),
#                 group = "NO") %>%
#   filter(str_to_lower(cellType) == "embryonic stem cell") %>%
#   dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
#   slice_min(meanRnk, with_ties = FALSE)
# tb.down <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_down_allLoops_atac.tsv")) %>%
#   dplyr::mutate(target = toupper(antibody),
#                 qValueLog = -log2(qValue),
#                 group = "DOWN") %>%
#   filter(str_to_lower(cellType) == "embryonic stem cell") %>%
#   dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
#   slice_min(meanRnk, with_ties = FALSE)
# 
# temp.up <- tb.up %>% dplyr::select(target, oddsRatio, qValueLog, group)
# temp.no <- tb.no %>% dplyr::select(target, oddsRatio, qValueLog, group)
# temp.down <- tb.down %>% dplyr::select(target, oddsRatio, qValueLog, group)
# 
# temp <- bind_rows(temp.up, temp.down)
# 
# order <- c((temp %>% dplyr::filter(group == "UP") %>% arrange(desc(oddsRatio)))$target, 
#            (temp %>% dplyr::filter(group == "DOWN") %>% arrange(desc(oddsRatio)))$target)
# temp$target <- factor(temp$target, levels = rev(order))
# p <- ggplot(temp, aes(x = group, y = target, color = qValueLog, size = oddsRatio)) +
#   geom_point() + theme_bw() + scale_color_gradient(low = "blue", high = "red") +
#   labs(x = NULL, y = NULL)  +   scale_size_continuous(range = c(1, 3)) +
#   theme(axis.text = element_text(size = 6),  # Set axis text size
#         axis.title = element_text(size = 6), # Set axis title size (if not removed)
#         legend.text = element_text(size = 6), # Set legend text size
#         legend.title = element_text(size = 6)) 
# 
# fileName <- paste0("anchorLOLA_dTAG_vs_DMSO_logOE_diff0.5_allLoops_OR_allAnchorBackground_atac")
# height <-3
# width <- 2
# png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
# print(p)
# dev.off()
# svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
# print(p)
# dev.off()


```

##### - Limiting to ATAC-seq signal, reg loop background
```{r}
atac <- fread(here(refDir, "GSM3106257_ATAC_ESC_1.bed")) %>% dplyr::select(V1, V2, V3)
colnames(atac) <- c("chr", "start", "end")
atac.gr <- makeGRangesFromDataFrame(atac)

# LOADING LOOPS
### Importing differential regulatory loops & extract anchor
loop.all <- fread(here(consensusDir, "chromo_cons_annoHierarchy.tsv")) %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "E-E"))
anchor.all <- (extractAnchor(loop.all))
overlaps <- findOverlaps(anchor.all, atac.gr)
anchor.all <- pintersect(anchor.all[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])


loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_logOE_pe-pe_dTAGvsDMSO_UP_diff0.5.bedpe"))
anchor.up <- (extractAnchor(loop.up))
overlaps <- findOverlaps(anchor.up, atac.gr)
anchor.up <- pintersect(anchor.up[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_logOE_pe-pe_dTAGvsDMSO_NO_diff0.5.bedpe"))
anchor.no <- (extractAnchor(loop.no))
overlaps <- findOverlaps(anchor.no, atac.gr)
anchor.no <- pintersect(anchor.no[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])


loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_logOE_pe-pe_dTAGvsDMSO_DOWN_diff0.5.bedpe"))
anchor.down <- (extractAnchor(loop.down))
overlaps <- findOverlaps(anchor.down, atac.gr)
anchor.down <- pintersect(anchor.down[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])



```


```{r}
# RUNNING LOLA
lolaDir <- here("../../result/lola")
dir.create(lolaDir, showWarnings = FALSE, recursive = TRUE)

# UP
result = runLOLA(anchor.up, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_up_pe-peLoops_atac.tsv"), sep = "\t")

# NO
result = runLOLA(anchor.no, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_no_pe-peLoops_atac.tsv"), sep = "\t")

# DOWN
result = runLOLA(anchor.down, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_down_pe-peLoops_atac.tsv"), sep = "\t")


### HEATMAP
alpha <- 0.05
tb.up <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_up_pe-peLoops_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.no <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_no_pe-peLoops_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.down <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_down_pe-peLoops_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)

temp.up <- tb.up %>% dplyr::select(target, oddsRatio)
colnames(temp.up) <- c("target", "OR_up")
temp.no <- tb.no %>% dplyr::select(target, oddsRatio)
colnames(temp.no) <- c("target", "OR_no")
temp.down <- tb.down %>% dplyr::select(target, oddsRatio)
colnames(temp.down) <- c("target", "OR_down")


temp <- full_join(full_join(temp.up, temp.no, by = c("target")), temp.down, by = c("target")) %>% mutate_all(~replace_na(., 1))

data <- as.matrix(temp[2:4])
rownames(data) <- temp$target

library(circlize)
col_fun <- colorRamp2(c(min(data), max(data)), c("white", "red"))


#fviz_nbclust(data, kmeans, method = "wss")

p <- Heatmap(
  data,
  name = "Odds Ratio",                   # Name of the heatmap legend
  cluster_columns = FALSE,            # Remove column dendrogram
  row_km = 4,                         # Define the number of k-means clusters for rows (adjust as needed)
  show_row_dend = FALSE,
  col = col_fun                       # Use the red gradient color scale
)

fileName <- paste0("anchorLOLA_dTAG_vs_DMSO_logOE_diff0.5_pe-peLoops_regAnchorBackground_atac")
height <- 7
width <- 3.5
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()



### Visualizing p-value and OR
alpha <- 0.05
tb.up <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_up_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody),
                qValueLog = -log2(qValue),
                group = "UP") %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.no <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_no_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody),
                qValueLog = -log2(qValue),
                group = "NO") %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.down <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_down_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody),
                qValueLog = -log2(qValue),
                group = "DOWN") %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)

temp.up <- tb.up %>% dplyr::select(target, oddsRatio, qValueLog, group)
temp.no <- tb.no %>% dplyr::select(target, oddsRatio, qValueLog, group)
temp.down <- tb.down %>% dplyr::select(target, oddsRatio, qValueLog, group)

temp <- bind_rows(temp.up, temp.down)

order <- c((temp %>% dplyr::filter(group == "UP") %>% arrange(desc(oddsRatio)))$target, 
           (temp %>% dplyr::filter(group == "DOWN") %>% arrange(desc(oddsRatio)))$target)
temp$target <- factor(temp$target, levels = rev(order))
p <- ggplot(temp, aes(x = group, y = target, color = qValueLog, size = oddsRatio)) +
  geom_point() + theme_bw() + scale_color_gradient(low = "blue", high = "red") +
  labs(x = NULL, y = NULL)  +
  theme(axis.text = element_text(size = 6),  # Set axis text size
        axis.title = element_text(size = 6), # Set axis title size (if not removed)
        legend.text = element_text(size = 6), # Set legend text size
        legend.title = element_text(size = 6)) 

fileName <- paste0("anchorLOLA_dTAG_vs_DMSO_diff0.2_OR_qValue_regAnchorBackground_atac")
height <-3
width <- 2
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()





### 241015 Testing on subset of loops
anchor.c1 <- extractAnchor(loop.cluster1)
overlaps <- findOverlaps(anchor.c1, atac.gr)
anchor.c1 <- pintersect(anchor.c1[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

anchor.c2 <- extractAnchor(loop.cluster2)
overlaps <- findOverlaps(anchor.c2, atac.gr)
anchor.c2 <- pintersect(anchor.c2[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

anchor.c3 <- extractAnchor(loop.cluster3)
overlaps <- findOverlaps(anchor.c3, atac.gr)
anchor.c3 <- pintersect(anchor.c3[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

anchor.c4 <- extractAnchor(loop.cluster4)
overlaps <- findOverlaps(anchor.c4, atac.gr)
anchor.c4 <- pintersect(anchor.c4[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

anchor.c5 <- extractAnchor(loop.cluster5)
overlaps <- findOverlaps(anchor.c5, atac.gr)
anchor.c5 <- pintersect(anchor.c5[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

anchor.c6 <- extractAnchor(loop.cluster6)
overlaps <- findOverlaps(anchor.c6, atac.gr)
anchor.c6 <- pintersect(anchor.c6[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

anchor.c7 <- extractAnchor(loop.cluster7)
overlaps <- findOverlaps(anchor.c7, atac.gr)
anchor.c7 <- pintersect(anchor.c7[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

anchor.c8 <- extractAnchor(loop.cluster8)
overlaps <- findOverlaps(anchor.c8, atac.gr)
anchor.c8 <- pintersect(anchor.c8[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

result = runLOLA(anchor.c1, anchor.all, lolaDB)
tb.c1 = as_tibble(result)

result = runLOLA(anchor.c2, anchor.all, lolaDB)
tb.c2 = as_tibble(result)

result = runLOLA(anchor.c3, anchor.all, lolaDB)
tb.c3 = as_tibble(result)

result = runLOLA(anchor.c4, anchor.all, lolaDB)
tb.c4 = as_tibble(result)
result = runLOLA(anchor.c5, anchor.all, lolaDB)
tb.c5 = as_tibble(result)

result = runLOLA(anchor.c6, anchor.all, lolaDB)
tb.c6 = as_tibble(result)

result = runLOLA(anchor.c7, anchor.all, lolaDB)
tb.c7 = as_tibble(result)

result = runLOLA(anchor.c8, anchor.all, lolaDB)
tb.c8 = as_tibble(result)

### HEATMAP
alpha <- 0.05
tb.c1 <- tb.c1 %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.c2 <- tb.c2 %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.c3 <- tb.c3 %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.c4 <- tb.c4 %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.c5 <- tb.c5 %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>%
  dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)

tb.c6 <- tb.c6 %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>%
  dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)

tb.c7 <- tb.c7 %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>%
  dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)

tb.c8 <- tb.c8 %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>%
  dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)

temp.c1 <- tb.c1 %>% dplyr::select(target, oddsRatio)
colnames(temp.c1) <- c("target", "OR_c1")
temp.c2 <- tb.c2 %>% dplyr::select(target, oddsRatio)
colnames(temp.c2) <- c("target", "OR_c2")
temp.c3 <- tb.c3 %>% dplyr::select(target, oddsRatio)
colnames(temp.c3) <- c("target", "OR_c3")
temp.c4 <- tb.c4 %>% dplyr::select(target, oddsRatio)
colnames(temp.c4) <- c("target", "OR_c4")
temp.c5 <- tb.c5 %>% dplyr::select(target, oddsRatio)
colnames(temp.c5) <- c("target", "OR_c5")
temp.c6 <- tb.c6 %>% dplyr::select(target, oddsRatio)
colnames(temp.c6) <- c("target", "OR_c6")
temp.c7 <- tb.c7 %>% dplyr::select(target, oddsRatio)
colnames(temp.c7) <- c("target", "OR_c7")
temp.c8 <- tb.c8 %>% dplyr::select(target, oddsRatio)
colnames(temp.c8) <- c("target", "OR_c8")

temp <- full_join(temp.c1, temp.c2, by = "target") %>%
  full_join(temp.c3, by = "target") %>%
  full_join(temp.c4, by = "target") %>%
  full_join(temp.c5, by = "target") %>%
  full_join(temp.c6, by = "target") %>%
  full_join(temp.c7, by = "target") %>%
  full_join(temp.c8, by = "target") %>%
  mutate_all(~replace_na(., 1))

data <- temp %>% column_to_rownames("target") %>% as.matrix()

library(circlize)
col_fun <- colorRamp2(c(1, max(data)), c("white", "red"))


#fviz_nbclust(data, kmeans, method = "wss")

p <- Heatmap(
  data,
  name = "Odds Ratio",                   # Name of the heatmap legend
  cluster_columns = FALSE,            # Remove column dendrogram
  show_row_dend = FALSE,
  col = col_fun,
  border = TRUE
)


# Initialize lists to store results
anchors <- list()
tbs <- list()
temps <- list()

# Process clusters c1 to c8
for (i in 1:4) {
  # Extract anchor
  loop_cluster <- get(paste0("loop.cluster", i))
  anchor <- extractAnchor(loop_cluster)
  
  # Find overlaps and intersect
  overlaps <- findOverlaps(anchor, atac.gr)
  anchor <- pintersect(anchor[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
  
  # Store anchor
  anchors[[i]] <- anchor
  
  # Run LOLA
  result <- runLOLA(anchor, anchor.all, lolaDB)
  tb <- as_tibble(result)
  
  # Filter and summarize
  tb <- tb %>%
    dplyr::mutate(target = toupper(antibody)) %>%
    filter(str_to_lower(cellType) == "embryonic stem cell") %>%
    dplyr::filter(qValue < alpha) %>%
    dplyr::group_by(target) %>%
    slice_min(meanRnk, with_ties = FALSE)
  
  # Store tb
  tbs[[i]] <- tb
  
  # Select and rename oddsRatio
  temp <- tb %>% dplyr::select(target, oddsRatio)
  colnames(temp) <- c("target", paste0("OR_c", i))
  
  # Store temp
  temps[[i]] <- temp
}

# Merge all temp tables into one
temp <- Reduce(function(x, y) full_join(x, y, by = "target"), temps) %>%
  mutate_all(~replace_na(., 1))
data <- temp %>% column_to_rownames("target") %>% as.matrix()

library(circlize)
col_fun <- colorRamp2(c(1, max(data)), c("white", "red"))


#fviz_nbclust(data, kmeans, method = "wss")

p <- Heatmap(
  data,
  name = "Odds Ratio",                   # Name of the heatmap legend
  cluster_columns = FALSE,            # Remove column dendrogram
  show_row_dend = FALSE,
  col = col_fun,
  border = TRUE
)
```


### A485
##### - Limited to ATAC-seq signal, all loop backgroupd
```{r}
atac <- fread(here(refDir, "GSM3106257_ATAC_ESC_1.bed")) %>% dplyr::select(V1, V2, V3)
colnames(atac) <- c("chr", "start", "end")
atac.gr <- makeGRangesFromDataFrame(atac)

# LOADING LOOPS
### Importing differential regulatory loops & extract anchor
loop.all <- fread(here(consensusDir, "chromo_cons_annoHierarchy.tsv"))
anchor.all <- (extractAnchor(loop.all))
overlaps <- findOverlaps(anchor.all, atac.gr)
anchor.all <- pintersect(anchor.all[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_A485vsDMSO_UP_diff0.2.bedpe"))
anchor.up <- (extractAnchor(loop.up))
overlaps <- findOverlaps(anchor.up, atac.gr)
anchor.up <- pintersect(anchor.up[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_A485vsDMSO_DOWN_diff0.2.bedpe"))
anchor.down <- (extractAnchor(loop.down))
overlaps <- findOverlaps(anchor.down, atac.gr)
anchor.down <- pintersect(anchor.down[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

loopNum <- nrow(loop.down)

# Selecting same number of extreme no loops
loop.no <- loop.all %>% 
  dplyr::mutate(absDiff = abs(diff_A485_DMSO)) %>%
  dplyr::filter(absDiff < 0.2) %>%
  dplyr::arrange(absDiff) %>% slice_head(n = loopNum)
anchor.no <- (extractAnchor(loop.no))
overlaps <- findOverlaps(anchor.no, atac.gr)
anchor.no <- pintersect(anchor.no[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
```

```{r}
# RUNNING LOLA
lolaDir <- here("../../result/lola")
dir.create(lolaDir, showWarnings = FALSE, recursive = TRUE)

# UP
result = runLOLA(anchor.up, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_up_allLoops_extreme_atac.tsv"), sep = "\t")

# NO
result = runLOLA(anchor.no, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_no_allLoops_extreme_atac.tsv"), sep = "\t")

# DOWN
result = runLOLA(anchor.down, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_down_allLoops_extreme_atac.tsv"), sep = "\t")



### HEATMAP
alpha <- 0.05
tb.up <- fread(here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_up_allLoops_extreme_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.no <- fread(here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_no_allLoops_extreme_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.down <- fread(here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_down_allLoops_extreme_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)

temp.up <- tb.up %>% dplyr::select(target, oddsRatio)
colnames(temp.up) <- c("target", "OR_up")
temp.no <- tb.no %>% dplyr::select(target, oddsRatio)
colnames(temp.no) <- c("target", "OR_no")
temp.down <- tb.down %>% dplyr::select(target, oddsRatio)
colnames(temp.down) <- c("target", "OR_down")


temp <- full_join(full_join(temp.up, temp.no, by = c("target")), temp.down, by = c("target")) %>% mutate_all(~replace_na(., 1))

data <- as.matrix(temp[2:4])
rownames(data) <- temp$target

library(circlize)
col_fun <- colorRamp2(c(min(data), max(data)), c("white", "red"))


#fviz_nbclust(data, kmeans, method = "wss")

p <- Heatmap(
  data,
  name = "Odds Ratio",                   # Name of the heatmap legend
  cluster_columns = FALSE,            # Remove column dendrogram
  row_km = 5,                         # Define the number of k-means clusters for rows (adjust as needed)
  show_row_dend = FALSE,
  col = col_fun,
  border = TRUE
)

fileName <- paste0("anchorLOLA_A485_vs_DMSO_diff0.2_allLoops_extreme_regAnchorBackground_atac")
height <- 7
width <- 3.5
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()




### Visualizing p-value and OR
alpha <- 0.05
tb.up <- fread(here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_up_allLoops_extreme_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody),
                qValueLog = -log2(qValue),
                group = "UP") %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.no <- fread(here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_no_allLoops_extreme_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody),
                qValueLog = -log2(qValue),
                group = "NO") %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.down <- fread(here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_down_allLoops_extreme_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody),
                qValueLog = -log2(qValue),
                group = "DOWN") %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)

temp.up <- tb.up %>% dplyr::select(target, oddsRatio, qValueLog, group)
temp.no <- tb.no %>% dplyr::select(target, oddsRatio, qValueLog, group)
temp.down <- tb.down %>% dplyr::select(target, oddsRatio, qValueLog, group)

temp <- bind_rows(temp.up, temp.down)

order <- unique(c((temp %>% dplyr::filter(group == "UP") %>% arrange(desc(oddsRatio)))$target, 
           (temp %>% dplyr::filter(group == "DOWN") %>% arrange(desc(oddsRatio)))$target))
temp$target <- factor(temp$target, levels = rev(order))
p <- ggplot(temp, aes(x = group, y = target, color = qValueLog, size = oddsRatio)) +
  geom_point() + theme_bw() + scale_color_gradient(low = "blue", high = "red") +
  labs(x = NULL, y = NULL)  +   scale_size_continuous(range = c(1, 3)) +
  theme(axis.text = element_text(size = 6),  # Set axis text size
        axis.title = element_text(size = 6), # Set axis title size (if not removed)
        legend.text = element_text(size = 6), # Set legend text size
        legend.title = element_text(size = 6)) 

fileName <- paste0("anchorLOLA_A485_vs_DMSO_diff0.2_allLoops_extreme_OR_qValue_regAnchorBackground_atac")
height <-3
width <- 2
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```

##### - Limited to ATAC-seq signal, reg loop backgrouund 
```{r}
atac <- fread(here(refDir, "GSM3106257_ATAC_ESC_1.bed")) %>% dplyr::select(V1, V2, V3)
colnames(atac) <- c("chr", "start", "end")
atac.gr <- makeGRangesFromDataFrame(atac)

# LOADING LOOPS
### Importing differential regulatory loops & extract anchor
loop.all <- fread(here(consensusDir, "chromo_cons_annoHierarchy.tsv")) %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "E-E"))
anchor.all <- (extractAnchor(loop.all))
overlaps <- findOverlaps(anchor.all, atac.gr)
anchor.all <- pintersect(anchor.all[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_UP_diff0.2.bedpe"))
anchor.up <- (extractAnchor(loop.up))
overlaps <- findOverlaps(anchor.up, atac.gr)
anchor.up <- pintersect(anchor.up[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_NO_diff0.2.bedpe"))
anchor.no <- (extractAnchor(loop.no))
overlaps <- findOverlaps(anchor.no, atac.gr)
anchor.no <- pintersect(anchor.no[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_DOWN_diff0.2.bedpe"))
anchor.down <- (extractAnchor(loop.down))
overlaps <- findOverlaps(anchor.down, atac.gr)
anchor.down <- pintersect(anchor.down[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])


```

```{r}
# RUNNING LOLA
lolaDir <- here("../../result/lola")
dir.create(lolaDir, showWarnings = FALSE, recursive = TRUE)

# UP
result = runLOLA(anchor.up, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_up_atac.tsv"), sep = "\t")

# NO
result = runLOLA(anchor.no, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_no_atac.tsv"), sep = "\t")

# DOWN
result = runLOLA(anchor.down, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_down_atac.tsv"), sep = "\t")



### HEATMAP
alpha <- 0.05
tb.up <- fread(here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_up_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.no <- fread(here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_no_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.down <- fread(here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_down_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody)) %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)

temp.up <- tb.up %>% dplyr::select(target, oddsRatio)
colnames(temp.up) <- c("target", "OR_up")
temp.no <- tb.no %>% dplyr::select(target, oddsRatio)
colnames(temp.no) <- c("target", "OR_no")
temp.down <- tb.down %>% dplyr::select(target, oddsRatio)
colnames(temp.down) <- c("target", "OR_down")


temp <- full_join(full_join(temp.up, temp.no, by = c("target")), temp.down, by = c("target")) %>% mutate_all(~replace_na(., 1))

data <- as.matrix(temp[2:4])
rownames(data) <- temp$target

library(circlize)
col_fun <- colorRamp2(c(min(data), max(data)), c("white", "red"))


p <- Heatmap(
  data,
  name = "Odds Ratio",                   # Name of the heatmap legend
  cluster_columns = FALSE,            # Remove column dendrogram
  row_km = 4,                         # Define the number of k-means clusters for rows (adjust as needed)
  show_row_dend = FALSE,
  col = col_fun                       # Use the red gradient color scale
)

fileName <- paste0("anchorLOLA_A485_vs_DMSO_diff0.2_regAnchorBackground_atac")
height <- 7
width <- 3.5
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()


### Visualizing p-value and OR
alpha <- 0.05
tb.up <- fread(here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_up_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody),
                qValueLog = -log2(qValue),
                group = "UP") %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.no <- fread(here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_no_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody),
                qValueLog = -log2(qValue),
                group = "NO") %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)
tb.down <- fread(here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_down_atac.tsv")) %>%
  dplyr::mutate(target = toupper(antibody),
                qValueLog = -log2(qValue),
                group = "DOWN") %>%
  filter(str_to_lower(cellType) == "embryonic stem cell") %>%
  dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
  slice_min(meanRnk, with_ties = FALSE)

temp.up <- tb.up %>% dplyr::select(target, oddsRatio, qValueLog, group)
temp.no <- tb.no %>% dplyr::select(target, oddsRatio, qValueLog, group)
temp.down <- tb.down %>% dplyr::select(target, oddsRatio, qValueLog, group)

temp <- bind_rows(temp.up, temp.down)

order <- c((temp %>% dplyr::filter(group == "UP") %>% arrange(desc(oddsRatio)))$target, 
           (temp %>% dplyr::filter(group == "DOWN") %>% arrange(desc(oddsRatio)))$target)
temp$target <- factor(temp$target, levels = rev(order))
p <- ggplot(temp, aes(x = group, y = target, color = qValueLog, size = oddsRatio)) +
  geom_point() + theme_bw() + scale_color_gradient(low = "blue", high = "red") +
  labs(x = NULL, y = NULL)  +
  theme(axis.text = element_text(size = 6),  # Set axis text size
        axis.title = element_text(size = 6), # Set axis title size (if not removed)
        legend.text = element_text(size = 6), # Set legend text size
        legend.title = element_text(size = 6)) 

fileName <- paste0("anchorLOLA_A485_vs_DMSO_diff0.2_OR_qValue_regAnchorBackground_atac")
height <-3
width <- 2
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```


## [2.22] Comparing absolute RNA expression level across group
The aim is to see if there is a trend in RNA expression level among group 1, 2, 3, 4.
It would be making comparison among genes. For this, TPM should be used.
```{r}
## Importing groups
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group2.tsv"))$gene
group5 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group5.tsv"))$gene
group8 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group8.tsv"))$gene

## Importing RNA-seq TPM for DMSO
tpm.selected <- fread(here(refDir, "readCount.filtered.TPM.all.tsv")) %>% dplyr::select(1, 3, 4, 5)
colnames(tpm.selected) <- c("ensembl", "rep1", "rep2", "rep3")

tpm.selected <- tpm.selected %>% dplyr::rowwise() %>%
  dplyr::mutate(
  group = ifelse(ensembl %in% group1, "group1",
                 ifelse(ensembl %in% group2, "group2",
                        ifelse(ensembl %in% group5, "group5",
                               ifelse(ensembl %in% group8, "group8", NA))))) %>% 
  dplyr::filter(!is.na(group)) %>%
  dplyr::mutate(avgTPM = mean(rep1, rep2, rep3))


ggplot(tpm.selected, aes(x = group, y = avgTPM)) +
  geom_violin(aes(fill = group), show.legend = FALSE) +
  geom_boxplot(width = 0.1, outlier.shape = NA) + 
  scale_y_log10() +
  theme_classic()



  p <- ggplot(temp.tb, aes(x = group, y = score)) + geom_violin(aes(fill = group), show.legend = FALSE) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() + coord_cartesian(ylim = c(ymin, ymax)) +
    annotate("text", x = 1, y = ymin + 1, label = paste0("p12: ", convPvalue(p12), "\n",
                                                         "p15: ", convPvalue(p15), "\n",
                                                         "p18: ", convPvalue(p18), "\n",
                                                         "p25: ", convPvalue(p25), "\n",
                                                         "p28: ", convPvalue(p28), "\n",
                                                         "p58: ",convPvalue( p58), "\n"),
             color = "black", hjust = 0, size = 3)
  
  
  fileName <- paste0("insulation_score_", note)
  height <- 3
  width <- 3
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
```

## [2.23] Checking Early gene from Bobbie
#### dTAG
```{r}
## Importing groups
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group2.tsv"))$gene
group5 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group5.tsv"))$gene
group8 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group8.tsv"))$gene


## Importing Bobbie gene classification
geneCluster <- fread(here(refDir, "bobbie_gene_classification.csv")) %>%
  dplyr::select(gene, enst, Cluster)
colnames(geneCluster) <- c("gene", "ensembl_transcript_id", "cluster")


## Converting transcript ID to gene ID
idPair_tg <- getBM(attributes = c("ensembl_transcript_id", "ensembl_gene_id"),
                 filters = "ensembl_transcript_id",
                 values = geneCluster$ensembl_transcript,
                 mart = ensembl.v102)
geneCluster <- geneCluster %>% dplyr::left_join(idPair_tg, by = c("ensembl_transcript_id"))

# Making data for stacked barplot
countGene <- function(geneCluster, groupName, clusterName){
  num <- nrow(geneCluster %>% dplyr::filter(ensembl_gene_id %in% groupName,
                                            cluster %in% clusterName))
  return(num)
}
countGeneList <- function(geneCluster, groupName){
  n1 <- countGene(geneCluster, groupName, "Early")
  n2 <- countGene(geneCluster, groupName, "Middle")
  n3 <-countGene(geneCluster, groupName, "Late")
  n4 <- countGene(geneCluster, groupName, "Transient")
  return(c(n1, n2, n3, n4))
}

group <- c(rep("group1", 4), rep("group2", 4), rep("group3", 4), rep("group4", 4))
cluster <- rep(c("Early", "Middle", "Late", "Transient"), 4)
cluster <- factor(cluster, levels = c("Early", "Middle", "Late", "Transient"))
value <- c(countGeneList(geneCluster, group1),
           countGeneList(geneCluster, group2),
           countGeneList(geneCluster, group5),
           countGeneList(geneCluster, group8))

data <- data.frame(group, cluster, value)


# Plotting
ggplot(data, aes(fill=cluster, y=value, x=group)) + 
    geom_bar(position="fill", stat="identity") + theme_classic()


# Statistical analysis 
# Similar to the Chi-Square test, Fisher’s Exact Test is used when you have smaller sample sizes or when the expected frequency in any cell of the contingency table is below 5.
temp <- data %>% dplyr::filter(group %in% c("group1", "group2"))
contingency_table <- xtabs(value ~ group + cluster, data = temp)
fisher_result <- fisher.test(contingency_table, simulate.p.value = TRUE, B = 100000)
fisher_result

temp <- data %>% dplyr::filter(group %in% c("group1", "group3"))
contingency_table <- xtabs(value ~ group + cluster, data = temp)
fisher_result <- fisher.test(contingency_table, simulate.p.value = TRUE, B = 100000)
fisher_result

temp <- data %>% dplyr::filter(group %in% c("group1", "group4"))
contingency_table <- xtabs(value ~ group + cluster, data = temp)
fisher_result <- fisher.test(contingency_table, simulate.p.value = TRUE, B = 100000)
fisher_result


### P-N
resultDir <- here("../../result")
temp2 <- readRDS(here(resultDir, "gene_loop_link.rds"))

pnOver8 <- (temp2 %>% dplyr::filter(total >= 8))$gene
pnOver6 <- (temp2 %>% dplyr::filter(total >= 6, total < 8))$gene
pnOver4 <- (temp2 %>% dplyr::filter(total >= 4, total < 6))$gene
pnOver2 <- (temp2 %>% dplyr::filter(total >= 2, total < 4))$gene
pnOver0 <- (temp2 %>% dplyr::filter(total < 2))$gene

group <- c(rep("pnOver8", 4), rep("pnOver6", 4), rep("pnOver4", 4), rep("pnOver2", 4), rep("pnOver0", 4))
cluster <- rep(c("Early", "Middle", "Late", "Transient"), 5)
cluster <- factor(cluster, levels = c("Early", "Middle", "Late", "Transient"))
value <- c(countGeneList(geneCluster, pnOver8),
           countGeneList(geneCluster, pnOver6),
           countGeneList(geneCluster, pnOver4),
           countGeneList(geneCluster, pnOver2),
           countGeneList(geneCluster, pnOver0))

data <- data.frame(group, cluster, value)
# Plotting
ggplot(data, aes(fill=cluster, y=value, x=group)) + 
    geom_bar(position="fill", stat="identity") + theme_classic()

temp <- data %>% dplyr::filter(group %in% c("pnOver2", "pnOver8"))
contingency_table <- xtabs(value ~ group + cluster, data = temp)
fisher_result <- fisher.test(contingency_table, simulate.p.value = TRUE, B = 100000)
fisher_result

### P-S
psOver4 <- (temp2 %>% dplyr::filter(num_ps >= 4))$gene
psOver3 <- (temp2 %>% dplyr::filter(num_ps >= 3, num_ps < 4))$gene
psOver2 <- (temp2 %>% dplyr::filter(num_ps >= 2, num_ps < 3))$gene
psOver1 <- (temp2 %>% dplyr::filter(num_ps >= 1, num_ps < 2))$gene
psOver0 <- (temp2 %>% dplyr::filter(num_ps < 1))$gene

group <- c(rep("psOver4", 4), rep("psOver3", 4), rep("psOver2", 4), rep("psOver1", 4), rep("psOver0", 4))
cluster <- rep(c("Early", "Middle", "Late", "Transient"), 5)
cluster <- factor(cluster, levels = c("Early", "Middle", "Late", "Transient"))
value <- c(countGeneList(geneCluster, psOver4),
           countGeneList(geneCluster, psOver3),
           countGeneList(geneCluster, psOver2),
           countGeneList(geneCluster, psOver1),
           countGeneList(geneCluster, psOver0))

data <- data.frame(group, cluster, value)
# Plotting
ggplot(data, aes(fill=cluster, y=value, x=group)) + 
    geom_bar(position="fill", stat="identity") + theme_classic()
temp <- data %>% dplyr::filter(group %in% c("psOver2", "psOver4"))
contingency_table <- xtabs(value ~ group + cluster, data = temp)
fisher_result <- fisher.test(contingency_table, simulate.p.value = TRUE, B = 100000)
fisher_result

### P-E
peOver4 <- (temp2 %>% dplyr::filter(num_pe >= 4))$gene
peOver3 <- (temp2 %>% dplyr::filter(num_pe >= 3, num_pe < 4))$gene
peOver2 <- (temp2 %>% dplyr::filter(num_pe >= 2, num_pe < 3))$gene
peOver1 <- (temp2 %>% dplyr::filter(num_pe >= 1, num_pe < 2))$gene
peOver0 <- (temp2 %>% dplyr::filter(num_pe < 1))$gene

group <- c(rep("peOver4", 4), rep("peOver3", 4), rep("peOver2", 4), rep("peOver1", 4), rep("peOver0", 4))
cluster <- rep(c("Early", "Middle", "Late", "Transient"), 5)
cluster <- factor(cluster, levels = c("Early", "Middle", "Late", "Transient"))
value <- c(countGeneList(geneCluster, peOver4),
           countGeneList(geneCluster, peOver3),
           countGeneList(geneCluster, peOver2),
           countGeneList(geneCluster, peOver1),
           countGeneList(geneCluster, peOver0))

data <- data.frame(group, cluster, value)
# Plotting
ggplot(data, aes(fill=cluster, y=value, x=group)) + 
    geom_bar(position="fill", stat="identity") + theme_classic()

### P-P
ppOver4 <- (temp2 %>% dplyr::filter(num_pp >= 4))$gene
ppOver3 <- (temp2 %>% dplyr::filter(num_pp >= 3, num_pp < 4))$gene
ppOver2 <- (temp2 %>% dplyr::filter(num_pp >= 2, num_pp < 3))$gene
ppOver1 <- (temp2 %>% dplyr::filter(num_pp >= 1, num_pp < 2))$gene
ppOver0 <- (temp2 %>% dplyr::filter(num_pp < 1))$gene

group <- c(rep("ppOver4", 4), rep("ppOver3", 4), rep("ppOver2", 4), rep("ppOver1", 4), rep("ppOver0", 4))
cluster <- rep(c("Early", "Middle", "Late", "Transient"), 5)
cluster <- factor(cluster, levels = c("Early", "Middle", "Late", "Transient"))
value <- c(countGeneList(geneCluster, ppOver4),
           countGeneList(geneCluster, ppOver3),
           countGeneList(geneCluster, ppOver2),
           countGeneList(geneCluster, ppOver1),
           countGeneList(geneCluster, ppOver0))
data <- data.frame(group, cluster, value)
# Plotting
ggplot(data, aes(fill=cluster, y=value, x=group)) + 
    geom_bar(position="fill", stat="identity") + theme_classic()

```
#### A485
```{r}
## Importing groups
group1 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group2.tsv"))$gene
group3 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group3.tsv"))$gene
group4 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group4.tsv"))$gene
group5 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group5.tsv"))$gene
group6 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group6.tsv"))$gene
group7 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group7.tsv"))$gene
group8 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group8.tsv"))$gene
group9 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group9.tsv"))$gene


## Importing Bobbie gene classification
geneCluster <- fread(here(refDir, "bobbie_gene_classification.csv")) %>%
  dplyr::select(gene, enst, Cluster)
colnames(geneCluster) <- c("gene", "ensembl_transcript_id", "cluster")


## Converting transcript ID to gene ID
idPair_tg <- getBM(attributes = c("ensembl_transcript_id", "ensembl_gene_id"),
                 filters = "ensembl_transcript_id",
                 values = geneCluster$ensembl_transcript,
                 mart = ensembl.v102)
geneCluster <- geneCluster %>% dplyr::left_join(idPair_tg, by = c("ensembl_transcript_id"))

# Making data for stacked barplot
countGene <- function(geneCluster, groupName, clusterName){
  num <- nrow(geneCluster %>% dplyr::filter(ensembl_gene_id %in% groupName,
                                            cluster %in% clusterName))
  return(num)
}
countGeneList <- function(geneCluster, groupName){
  n1 <- countGene(geneCluster, groupName, "Early")
  n2 <- countGene(geneCluster, groupName, "Middle")
  n3 <-countGene(geneCluster, groupName, "Late")
  n4 <- countGene(geneCluster, groupName, "Transient")
  return(c(n1, n2, n3, n4))
}

group <- c(rep("group1", 4), rep("group2", 4), rep("group3", 4),
           rep("group4", 4), rep("group5", 4), rep("group6", 4),
           rep("group7", 4), rep("group8", 4), rep("group9", 4))
cluster <- rep(c("Early", "Middle", "Late", "Transient"), 9)
cluster <- factor(cluster, levels = c("Early", "Middle", "Late", "Transient"))
value <- c(countGeneList(geneCluster, group1),
           countGeneList(geneCluster, group2),
           countGeneList(geneCluster, group3),
           countGeneList(geneCluster, group4),
           countGeneList(geneCluster, group5),
           countGeneList(geneCluster, group6),
           countGeneList(geneCluster, group7),
           countGeneList(geneCluster, group8),
           countGeneList(geneCluster, group9))

data <- data.frame(group, cluster, value)


# Plotting
ggplot(data, aes(fill=cluster, y=value, x=group)) + 
    geom_bar(position="fill", stat="identity") + theme_classic()


# Statistical analysis 
# Similar to the Chi-Square test, Fisher’s Exact Test is used when you have smaller sample sizes or when the expected frequency in any cell of the contingency table is below 5.
temp <- data %>% dplyr::filter(group %in% c("group1", "group2"))
contingency_table <- xtabs(value ~ group + cluster, data = temp)
fisher_result <- fisher.test(contingency_table, simulate.p.value = TRUE, B = 100000)
fisher_result

temp <- data %>% dplyr::filter(group %in% c("group1", "group3"))
contingency_table <- xtabs(value ~ group + cluster, data = temp)
fisher_result <- fisher.test(contingency_table, simulate.p.value = TRUE, B = 100000)
fisher_result

temp <- data %>% dplyr::filter(group %in% c("group1", "group4"))
contingency_table <- xtabs(value ~ group + cluster, data = temp)
fisher_result <- fisher.test(contingency_table, simulate.p.value = TRUE, B = 100000)
fisher_result


```

## [2.24] Checking bookmarking
(1) Checking whether subset of regulatory loop have bookmarking
(2) Checking whether loops related to genes have bookmarking
H3K27ac_effie
ATAC_effie
TBP
Oct4_effie, Oct4_dsg
Sox2_effie, Sox2_dsg
Esrrb_dsg
Klf4_effie
CTCF
##### Functions
```{r}
extractAnchor <- function(loop){
  anchor1 <- loop %>% dplyr::select(c(1, 2, 3))
  colnames(anchor1) <- c("chr", "start", "end")
  anchor2 <- loop %>% dplyr::select(c(4, 5, 6))
  colnames(anchor2) <- c("chr", "start", "end")
  anchors <- reduce(makeGRangesFromDataFrame(bind_rows(anchor1, anchor2)))
    return(anchors)
}

runFisherExact <- function(interest.gr, background.gr, query.gr){
  overlaps_interest <- countOverlaps(interest.gr, query.gr)
  a <- sum(overlaps_interest > 0)
  total_interest <- length(interest.gr)
  c <- total_interest - a
  
  overlaps_background <- countOverlaps(background.gr, query.gr)
  b <- sum(overlaps_background > 0)
  total_background <- length(background.gr)
  d <- total_background - b
  
  # Construct contingency table
  contingency_table <- matrix(c(a, c, b, d), nrow=2, byrow=TRUE,
                              dimnames=list("Region" = c("interest.gr", "background.gr"),
                                            "Overlap" = c("Yes", "No")))
  
  # Perform Fisher's exact test
  fisher_result <- fisher.test(contingency_table)
  
  return(fisher_result)
}

runFisherExactCombination <- function(interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr,
                                      targetName,
                                      retained.gr, lost.gr){
  # SEEDING
  temp <- runFisherExact(loop.up.gr, background.gr, retained.gr)
  result.tb <- tibble(interest = paste0(interestName, "_UP"),
                      target = paste0(targetName, "_retained"),
                      pvalue = temp$p.value,
                      oddsRatio = temp$estimate)
  
  # ADDING ROWS
  temp <- runFisherExact(loop.no.gr, background.gr, retained.gr)
  result.tb <- result.tb %>% 
    add_row(interest = paste0(interestName, "_NO"),
            target = paste0(targetName, "_retained"),
            pvalue = temp$p.value,
            oddsRatio = temp$estimate)
  
  # ADDING ROWS
  temp <- runFisherExact(loop.down.gr, background.gr, retained.gr)
  result.tb <- result.tb %>% 
    add_row(interest = paste0(interestName, "_DOWN"),
            target = paste0(targetName, "_retained"),
            pvalue = temp$p.value,
            oddsRatio = temp$estimate)
  
  # ADDING ROWS
  temp <- runFisherExact(loop.up.gr, background.gr, lost.gr)
  result.tb <- result.tb %>% 
    add_row(interest = paste0(interestName, "_UP"),
            target = paste0(targetName, "_lost"),
            pvalue = temp$p.value,
            oddsRatio = temp$estimate)
  
  # ADDING ROWS
  temp <- runFisherExact(loop.no.gr, background.gr, lost.gr)
  result.tb <- result.tb %>% 
    add_row(interest = paste0(interestName, "_NO"),
            target = paste0(targetName, "_lost"),
            pvalue = temp$p.value,
            oddsRatio = temp$estimate)
  
  # ADDING ROWS
  temp <- runFisherExact(loop.down.gr, background.gr, lost.gr)
  result.tb <- result.tb %>% 
    add_row(interest = paste0(interestName, "_DOWN"),
            target = paste0(targetName, "_lost"),
            pvalue = temp$p.value,
            oddsRatio = temp$estimate)
  
  return(result.tb)
}

runFisherExactCombinationBinary <- function(interestName,
                                      loop.upno.gr, loop.down.gr,
                                      background.gr,
                                      targetName,
                                      retained.gr, lost.gr){
  # SEEDING
  temp <- runFisherExact(loop.upno.gr, background.gr, retained.gr)
  result.tb <- tibble(interest = paste0(interestName, "_UP/NO"),
                      target = paste0(targetName, "_retained"),
                      pvalue = temp$p.value,
                      oddsRatio = temp$estimate)
  

  # ADDING ROWS
  temp <- runFisherExact(loop.down.gr, background.gr, retained.gr)
  result.tb <- result.tb %>% 
    add_row(interest = paste0(interestName, "_DOWN"),
            target = paste0(targetName, "_retained"),
            pvalue = temp$p.value,
            oddsRatio = temp$estimate)
  
  # ADDING ROWS
  temp <- runFisherExact(loop.upno.gr, background.gr, lost.gr)
  result.tb <- result.tb %>% 
    add_row(interest = paste0(interestName, "_UP/NO"),
            target = paste0(targetName, "_lost"),
            pvalue = temp$p.value,
            oddsRatio = temp$estimate)

  
  # ADDING ROWS
  temp <- runFisherExact(loop.down.gr, background.gr, lost.gr)
  result.tb <- result.tb %>% 
    add_row(interest = paste0(interestName, "_DOWN"),
            target = paste0(targetName, "_lost"),
            pvalue = temp$p.value,
            oddsRatio = temp$estimate)
  
  return(result.tb)
}

runFisherExactCombinationRetained <- function(interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr,
                                      targetName,
                                      retained.gr){
  # SEEDING
  temp <- runFisherExact(loop.up.gr, background.gr, retained.gr)
  result.tb <- tibble(interest = paste0(interestName, "_UP"),
                      target = paste0(targetName, "_retained"),
                      pvalue = temp$p.value,
                      oddsRatio = temp$estimate)
  
  # ADDING ROWS
  temp <- runFisherExact(loop.no.gr, background.gr, retained.gr)
  result.tb <- result.tb %>% 
    add_row(interest = paste0(interestName, "_NO"),
            target = paste0(targetName, "_retained"),
            pvalue = temp$p.value,
            oddsRatio = temp$estimate)
  
  # ADDING ROWS
  temp <- runFisherExact(loop.down.gr, background.gr, retained.gr)
  result.tb <- result.tb %>% 
    add_row(interest = paste0(interestName, "_DOWN"),
            target = paste0(targetName, "_retained"),
            pvalue = temp$p.value,
            oddsRatio = temp$estimate)
  

  return(result.tb)
}

runFisherExactCombinationTarget <- function(targetName, interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr){
  # RETAINED loop
  retained <- fread(here(refDir, "TF_bookmarking", targetName, "am_a.bed")) %>% dplyr::select(c(1, 2, 3))
  colnames(retained) <- c("chr", "start", "end")
  retained.gr <- makeGRangesFromDataFrame(retained)
  # LOST loop
  lost <- fread(here(refDir, "TF_bookmarking", targetName, "oa.bed")) %>% dplyr::select(c(1, 2, 3))
  colnames(lost) <- c("chr", "start", "end")
  lost.gr <- makeGRangesFromDataFrame(lost)
  
  result <- runFisherExactCombination(interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr,
                                      targetName,
                                      retained.gr, lost.gr)
  
  return(result)
}

runFisherExactCombinationTargetBinary <- function(targetName, interestName,
                                      loop.upno.gr, loop.down.gr,
                                      background.gr){
  # RETAINED loop
  retained <- fread(here(refDir, "TF_bookmarking", targetName, "am_a.bed")) %>% dplyr::select(c(1, 2, 3))
  colnames(retained) <- c("chr", "start", "end")
  retained.gr <- makeGRangesFromDataFrame(retained)
  # LOST loop
  lost <- fread(here(refDir, "TF_bookmarking", targetName, "oa.bed")) %>% dplyr::select(c(1, 2, 3))
  colnames(lost) <- c("chr", "start", "end")
  lost.gr <- makeGRangesFromDataFrame(lost)
  
  result <- runFisherExactCombinationBinary(interestName,
                                      loop.upno.gr, loop.down.gr,
                                      background.gr,
                                      targetName,
                                      retained.gr, lost.gr)
  
  return(result)
}

runFisherExactCombinationTargetRetained <- function(targetName, interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr){
  # RETAINED loop
  retained <- fread(here(refDir, "TF_bookmarking", targetName, "am_a.bed")) %>% dplyr::select(c(1, 2, 3))
  colnames(retained) <- c("chr", "start", "end")
  retained.gr <- makeGRangesFromDataFrame(retained)
  # LOST loop
  result <- runFisherExactCombinationRetained(interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr,
                                      targetName,
                                      retained.gr)
  
  return(result)
}
```

##### All loops
```{r}
#### Importing loops of interest
interestName <- "allLoop"
# BACKGROUND loop
background <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all.bedpe"))
background.gr <- (extractAnchor(background))
# UP loop
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_UP_diff0.2.bedpe"))
loop.up.gr <- (extractAnchor(loop.up))
# NO loop
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_NO_diff0.2.bedpe"))
loop.no.gr <- (extractAnchor(loop.no))
# DOWN loop
loopNum <- nrow(loop.no)
loop.all <- fread(here(consensusDir, "chromo_cons_annoHierarchy.tsv"))
loop.down <- loop.all %>% dplyr::filter(diff_dTAG_DMSO < -0.2) %>% dplyr::arrange(diff_dTAG_DMSO) %>% slice_head(n = loopNum)
loop.down.gr <- (extractAnchor(loop.down))

#### Importing loops of target
temp1 <- runFisherExactCombinationTarget("ATAC_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp2 <- runFisherExactCombinationTarget("CTCF", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp3 <- runFisherExactCombinationTarget("ESRRB_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp4 <- runFisherExactCombinationTarget("ESRRB_NCB", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp5 <- runFisherExactCombinationTarget("ESRRB_pfa", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp6 <- runFisherExactCombinationTarget("H3K27ac_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp7 <- runFisherExactCombinationTarget("KLF4_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp8 <- runFisherExactCombinationTarget("NANOG_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp9 <- runFisherExactCombinationTarget("NANOG_pfa", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp10 <- runFisherExactCombinationTarget("OCT4_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp11 <- runFisherExactCombinationTarget("OCT4_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp12 <- runFisherExactCombinationTarget("OCT4_pfa", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp13 <- runFisherExactCombinationTarget("SMC1", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp14 <- runFisherExactCombinationTarget("SOX2_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp15 <- runFisherExactCombinationTarget("SOX2_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp16 <- runFisherExactCombinationTarget("SOX2_pfa", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp17 <- runFisherExactCombinationTarget("TBP", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)




data <- bind_rows(temp1, temp2, temp3, temp4, temp5,
         temp6, temp7, temp8, temp9, temp10,
         temp11, temp12, temp13, temp14, temp15, temp16 ,temp17)

# Visualization
library(circlize)

heatmap_data <- data %>% dplyr::select(target, interest, oddsRatio) %>%
pivot_wider(names_from = interest, values_from = oddsRatio) %>%
  column_to_rownames(var = "target")

pvalue_data <- data %>% dplyr::select(target, interest, pvalue) %>%
  pivot_wider(names_from = interest, values_from = pvalue) %>%
  column_to_rownames(var = "target")

col_fun <- colorRamp2(c(0, 1, 5), 
                      c("blue", "white", "red"))

Heatmap(as.matrix(heatmap_data),
        name = "Odds Ratio",
        col = col_fun,
        # Add annotation for p-values
        cell_fun = function(j, i, x, y, width, height, fill) {
          pval <- pvalue_data[i, j]
          label <- ifelse(pval > 0.05, "n.s.", sprintf("%.2e", pval))
          grid.text(label, x, y, gp = gpar(fontsize = 10))
        },
        # Customize the heatmap layout
        cluster_rows = FALSE,
        cluster_columns = FALSE,
        column_title = "Interest",
        row_title = "Target",
        heatmap_legend_param = list(at = c(0, 1, 2, 3, 4, 5), 
                                    labels = c("0", "1", "2", "3", "4", "5")))
```
##### All loops refined
```{r}


#### Importing loops of target
temp1 <- runFisherExactCombinationTarget("ATAC_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp2 <- runFisherExactCombinationTarget("CTCF", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp3 <- runFisherExactCombinationTarget("ESRRB_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp4 <- runFisherExactCombinationTarget("H3K27ac_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp5 <- runFisherExactCombinationTarget("KLF4_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp6 <- runFisherExactCombinationTarget("NANOG_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp7 <- runFisherExactCombinationTarget("OCT4_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp8 <- runFisherExactCombinationTarget("SMC1", interestName,
                                     loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp9 <- runFisherExactCombinationTarget("SOX2_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp10 <- runFisherExactCombinationTarget("TBP", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)




data <- bind_rows(temp1, temp2, temp3, temp4, temp5,
         temp6, temp7, temp8, temp9, temp10)

# Visualization
library(circlize)

heatmap_data <- data %>% dplyr::select(target, interest, oddsRatio) %>%
pivot_wider(names_from = interest, values_from = oddsRatio) %>%
  column_to_rownames(var = "target")

pvalue_data <- data %>% dplyr::select(target, interest, pvalue) %>%
  pivot_wider(names_from = interest, values_from = pvalue) %>%
  column_to_rownames(var = "target")

col_fun <- colorRamp2(c(0, 1, 3), 
                      c("blue", "white", "red"))

Heatmap(as.matrix(heatmap_data),
        name = "Odds Ratio",
        col = col_fun,
        # Add annotation for p-values
        cell_fun = function(j, i, x, y, width, height, fill) {
          pval <- pvalue_data[i, j]
          label <- ifelse(pval > 0.05, "n.s.", sprintf("%.2e", pval))
          grid.text(label, x, y, gp = gpar(fontsize = 10))
        },
        # Customize the heatmap layout
        cluster_rows = TRUE,
        show_row_dend = FALSE,
        cluster_columns = FALSE,
        column_title = "Interest",
        row_title = "Target",
        heatmap_legend_param = list(at = c(0, 1, 2, 3), 
                                    labels = c("0", "1", "2", "3")))

```


##### Reg loops
```{r}
#### Importing loops of interest
interestName <- "regLoop"
# BACKGROUND loop
background <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe"))
background.gr <- (extractAnchor(background))
# UP loop
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_UP_diff0.2.bedpe"))
loop.up.gr <- (extractAnchor(loop.up))
# NO loop
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_NO_diff0.2.bedpe"))
loop.no.gr <- (extractAnchor(loop.no))
# UPNO loop
loop.upno <- bind_rows(loop.up, loop.no)
loop.upno.gr <- extractAnchor(loop.upno)
# DOWN loop
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_DOWN_diff0.2.bedpe"))
loop.down.gr <- (extractAnchor(loop.down))
```
###### Figure - H3K27ac only
```{r}
targetName <- "H3K27ac_effie"
# RETAINED loop
retained <- fread(here(refDir, "TF_bookmarking", targetName, "am_a.bed")) %>% dplyr::select(c(1, 2, 3))
colnames(retained) <- c("chr", "start", "end")
retained.gr <- makeGRangesFromDataFrame(retained)
# LOST loop
lost <- fread(here(refDir, "TF_bookmarking", targetName, "oa.bed")) %>% dplyr::select(c(1, 2, 3))
colnames(lost) <- c("chr", "start", "end")
lost.gr <- makeGRangesFromDataFrame(lost)


# SEEDING
temp <- runFisherExact(loop.upno.gr, background.gr, retained.gr)
result.tb <- tibble(interest = paste0(interestName, "_UP/NO"),
                    target = paste0(targetName, "_retained"),
                    pvalue = temp$p.value,
                    oddsRatio = temp$estimate)

# ADDING ROWS
temp <- runFisherExact(loop.down.gr, background.gr, retained.gr)
result.tb <- result.tb %>% 
  add_row(interest = paste0(interestName, "_DOWN"),
          target = paste0(targetName, "_retained"),
          pvalue = temp$p.value,
          oddsRatio = temp$estimate)

# ADDING ROWS
temp <- runFisherExact(loop.upno.gr, background.gr, lost.gr)
result.tb <- result.tb %>% 
  add_row(interest = paste0(interestName, "_UP/NO"),
          target = paste0(targetName, "_lost"),
          pvalue = temp$p.value,
          oddsRatio = temp$estimate)

# ADDING ROWS
temp <- runFisherExact(loop.down.gr, background.gr, lost.gr)
result.tb <- result.tb %>% 
  add_row(interest = paste0(interestName, "_DOWN"),
          target = paste0(targetName, "_lost"),
          pvalue = temp$p.value,
          oddsRatio = temp$estimate)


result.tb <- result.tb %>%
  mutate(interest = recode(interest, 
                           "regLoop_UP/NO" = "UP/NO",
                           "regLoop_DOWN" = "DOWN"),
         target = recode(target,
                         "H3K27ac_effie_retained" = "Retained",
                         "H3K27ac_effie_lost" = "Lost"))

result.tb$interest <- factor(result.tb$interest, levels = c("UP/NO", "DOWN"))

p <- ggplot(result.tb, aes(x = interest, y = target, size = -log10(pvalue), fill = oddsRatio)) +
  geom_point(shape = 21,        # Ensures a point with an outline
             stroke = 0.5*ptToMM      # Line width for the border
  ) + theme_bw() + 
  scale_size_continuous(range = c(1, 3)) +  # Set min and max point sizes here
  scale_fill_gradientn(colors = c("#4852A0", "white", "#CB333A"),  # Define gradient colors
                      values = scales::rescale(c(0.5, 1, 1.5)), limits = c(0.5, 1.5), 
                      #low = "white", high = "#CB333A",
                      #                  limits = c(1, 3),
                      oob = scales::squish, # Define gradient colors
                      guide = guide_colorbar(
                        barwidth = 1.5/5.08,  # Adjust width of the color bar
                        barheight = 15/5.08   # Adjust height of the color bar
                      )
  ) +
  labs(x = NULL, y = NULL)  +
  theme(
    axis.title = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )

fileName <- here(figDir, "heatmap_H3K27ac_bookmarking_reg_dotplot")
width <- panelSize(1.7)*mmToInch
height <- panelSize(1.1)*mmToInch
# png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
# print(p)
# dev.off()
svglite(paste0(fileName, ".svg"),  height = height, width = width)
print(p)
dev.off()

  
```
###### Figure - all targets
```{r}
#### Importing loops of target

temp1 <- runFisherExactCombinationTargetBinary("ESRRB_dsg", interestName,
                                      loop.upno.gr, loop.down.gr,
                                      background.gr)
temp2 <- runFisherExactCombinationTargetBinary("H3K27ac_effie", interestName,
                                      loop.upno.gr, loop.down.gr,
                                      background.gr)
temp3 <- runFisherExactCombinationTargetBinary("KLF4_effie", interestName,
                                      loop.upno.gr, loop.down.gr,
                                      background.gr)
temp4 <- runFisherExactCombinationTargetBinary("OCT4_effie", interestName,
                                      loop.upno.gr, loop.down.gr,
                                      background.gr)
temp5 <- runFisherExactCombinationTargetBinary("SOX2_effie", interestName,
                                      loop.upno.gr, loop.down.gr,
                                      background.gr)


data <- bind_rows(temp1, temp2, temp3, temp4, temp5)



result.tb <- data %>%
  mutate(interest = recode(interest, 
                           "regLoop_UP/NO" = "UP/NO",
                           "regLoop_DOWN" = "DOWN"),
         target = recode(target,
                         "H3K27ac_effie_retained" = "H3K27ac_retained",
                         "H3K27ac_effie_lost" = "H3K27ac_lost",
                         "ESRRB_dsg_retained" = "ESRRB_retained",
                         "ESRRB_dsg_lost" = "ESRRB_lost",
                         "KLF4_effie_retained" = "KLF4_retained",
                         "KLF4_effie_lost" = "KLF4_lost",
                         "OCT4_effie_retained" = "OCT4_retained",
                         "OCT4_effie_lost" = "OCT4_lost",
                         "SOX2_effie_retained" = "SOX2_retained",
                         "SOX2_effie_lost" = "SOX2_lost"))

result.tb$interest <- factor(result.tb$interest, levels = c("UP/NO", "DOWN"))
result.tb$target <- factor(result.tb$target, levels = rev(c("H3K27ac_retained", "H3K27ac_lost",
                                                        "ESRRB_retained", "ESRRB_lost",
                                                        "KLF4_retained", "KLF4_lost",
                                                        "OCT4_retained", "OCT4_lost",
                                                        "SOX2_retained","SOX2_lost")))

p <- ggplot(result.tb, aes(x = interest, y = target, size = -log10(pvalue), fill = oddsRatio)) +
  geom_point(shape = 21,        # Ensures a point with an outline
             stroke = 1*ptToMM      # Line width for the border
  ) + theme_bw() + 
  scale_size_continuous(range = c(1, 3)) +  # Set min and max point sizes here
  scale_fill_gradientn(colors = c("#4852A0", "white", "#CB333A"),  # Define gradient colors
                      values = scales::rescale(c(0.5, 1, 1.5)), limits = c(0.5, 1.5), 
                      #low = "white", high = "#CB333A",
                      #                  limits = c(1, 3),
                      oob = scales::squish, # Define gradient colors
                      guide = guide_colorbar(
                        barwidth = 1.5/5.08,  # Adjust width of the color bar
                        barheight = 15/5.08   # Adjust height of the color bar
                      )
  ) +
  labs(x = NULL, y = NULL)  +
  theme(
    axis.title = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )

fileName <- here(figDir, "heatmap_alltarget_bookmarking_reg_dotplot")
width <- panelSize(2.2)*mmToInch
height <- panelSize(2)*mmToInch
# png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
# print(p)
# dev.off()
svglite(paste0(fileName, ".svg"),  height = height, width = width)
print(p)
dev.off()








```
###### Retained & Lost
```{r}
#### Importing loops of target
temp1 <- runFisherExactCombinationTarget("ATAC_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp2 <- runFisherExactCombinationTarget("CTCF", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp3 <- runFisherExactCombinationTarget("ESRRB_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp4 <- runFisherExactCombinationTarget("ESRRB_NCB", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp5 <- runFisherExactCombinationTarget("ESRRB_pfa", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp6 <- runFisherExactCombinationTarget("H3K27ac_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp7 <- runFisherExactCombinationTarget("KLF4_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp8 <- runFisherExactCombinationTarget("NANOG_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp9 <- runFisherExactCombinationTarget("NANOG_pfa", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp10 <- runFisherExactCombinationTarget("OCT4_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp11 <- runFisherExactCombinationTarget("OCT4_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp12 <- runFisherExactCombinationTarget("OCT4_pfa", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp13 <- runFisherExactCombinationTarget("SMC1", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp14 <- runFisherExactCombinationTarget("SOX2_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp15 <- runFisherExactCombinationTarget("SOX2_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp16 <- runFisherExactCombinationTarget("SOX2_pfa", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp17 <- runFisherExactCombinationTarget("TBP", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)




data <- bind_rows(temp1, temp2, temp3, temp4, temp5,
         temp6, temp7, temp8, temp9, temp10,
         temp11, temp12, temp13, temp14, temp15, temp16 ,temp17)

# Visualization
library(circlize)

heatmap_data <- data %>% dplyr::select(target, interest, oddsRatio) %>%
pivot_wider(names_from = interest, values_from = oddsRatio) %>%
  column_to_rownames(var = "target")

pvalue_data <- data %>% dplyr::select(target, interest, pvalue) %>%
  pivot_wider(names_from = interest, values_from = pvalue) %>%
  column_to_rownames(var = "target")

col_fun <- colorRamp2(c(0, 1, 5), 
                      c("blue", "white", "red"))

Heatmap(as.matrix(heatmap_data),
        name = "Odds Ratio",
        col = col_fun,
        # Add annotation for p-values
        cell_fun = function(j, i, x, y, width, height, fill) {
          pval <- pvalue_data[i, j]
          label <- ifelse(pval > 0.05, "n.s.", sprintf("%.2e", pval))
          grid.text(label, x, y, gp = gpar(fontsize = 10))
        },
        # Customize the heatmap layout
        cluster_rows = TRUE,
        show_row_dend = FALSE,
        cluster_columns = FALSE,
        column_title = "Interest",
        row_title = "Target",
        heatmap_legend_param = list(at = c(0, 1, 2, 3, 4, 5), 
                                    labels = c("0", "1", "2", "3", "4", "5")))

```
######  Retained
```{r}

#### Importing loops of target
temp1 <- runFisherExactCombinationTargetRetained("ATAC_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp2 <- runFisherExactCombinationTargetRetained("CTCF", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp3 <- runFisherExactCombinationTargetRetained("ESRRB_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp4 <- runFisherExactCombinationTargetRetained("ESRRB_NCB", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp5 <- runFisherExactCombinationTargetRetained("ESRRB_pfa", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp6 <- runFisherExactCombinationTargetRetained("H3K27ac_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp7 <- runFisherExactCombinationTargetRetained("KLF4_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp8 <- runFisherExactCombinationTargetRetained("NANOG_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp9 <- runFisherExactCombinationTargetRetained("NANOG_pfa", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp10 <- runFisherExactCombinationTargetRetained("OCT4_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp11 <- runFisherExactCombinationTargetRetained("OCT4_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp12 <- runFisherExactCombinationTargetRetained("OCT4_pfa", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp13 <- runFisherExactCombinationTargetRetained("SMC1", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp14 <- runFisherExactCombinationTargetRetained("SOX2_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp15 <- runFisherExactCombinationTargetRetained("SOX2_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp16 <- runFisherExactCombinationTargetRetained("SOX2_pfa", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp17 <- runFisherExactCombinationTargetRetained("TBP", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)




data <- bind_rows(temp1, temp2, temp3, temp4, temp5,
         temp6, temp7, temp8, temp9, temp10,
         temp11, temp12, temp13, temp14, temp15, temp16 ,temp17)

# Visualization
library(circlize)

heatmap_data <- data %>% dplyr::select(target, interest, oddsRatio) %>%
pivot_wider(names_from = interest, values_from = oddsRatio) %>%
  column_to_rownames(var = "target")

pvalue_data <- data %>% dplyr::select(target, interest, pvalue) %>%
  pivot_wider(names_from = interest, values_from = pvalue) %>%
  column_to_rownames(var = "target")

col_fun <- colorRamp2(c(0, 1, 3), 
                      c("blue", "white", "red"))

Heatmap(as.matrix(heatmap_data),
        name = "Odds Ratio",
        col = col_fun,
        # Add annotation for p-values
        cell_fun = function(j, i, x, y, width, height, fill) {
          pval <- pvalue_data[i, j]
          label <- ifelse(pval > 0.05, "n.s.", sprintf("%.2e", pval))
          grid.text(label, x, y, gp = gpar(fontsize = 10))
        },
        # Customize the heatmap layout
        cluster_rows = TRUE,
        show_row_dend = FALSE,
        cluster_columns = FALSE,
        column_title = "Interest",
        row_title = "Target",
        heatmap_legend_param = list(at = c(0, 1, 2, 3), 
                                    labels = c("0", "1", "2", "3")))

```

#### Reg refined
###### Retained & Lost
```{r}


#### Importing loops of target
temp1 <- runFisherExactCombinationTarget("ATAC_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp2 <- runFisherExactCombinationTarget("CTCF", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp3 <- runFisherExactCombinationTarget("ESRRB_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp4 <- runFisherExactCombinationTarget("H3K27ac_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp5 <- runFisherExactCombinationTarget("KLF4_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp6 <- runFisherExactCombinationTarget("NANOG_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp7 <- runFisherExactCombinationTarget("OCT4_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp8 <- runFisherExactCombinationTarget("SMC1", interestName,
                                     loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp9 <- runFisherExactCombinationTarget("SOX2_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp10 <- runFisherExactCombinationTarget("TBP", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)




data <- bind_rows(temp1, temp2, temp3, temp4, temp5,
         temp6, temp7, temp8, temp9, temp10)

# Visualization
library(circlize)

heatmap_data <- data %>% dplyr::select(target, interest, oddsRatio) %>%
pivot_wider(names_from = interest, values_from = oddsRatio) %>%
  column_to_rownames(var = "target")

pvalue_data <- data %>% dplyr::select(target, interest, pvalue) %>%
  pivot_wider(names_from = interest, values_from = pvalue) %>%
  column_to_rownames(var = "target")

col_fun <- colorRamp2(c(0, 1, 3), 
                      c("blue", "white", "red"))

Heatmap(as.matrix(heatmap_data),
        name = "Odds Ratio",
        col = col_fun,
        # Add annotation for p-values
        cell_fun = function(j, i, x, y, width, height, fill) {
          pval <- pvalue_data[i, j]
          label <- ifelse(pval > 0.05, "n.s.", sprintf("%.2e", pval))
          grid.text(label, x, y, gp = gpar(fontsize = 10))
        },
        # Customize the heatmap layout
        cluster_rows = TRUE,
        show_row_dend = FALSE,
        cluster_columns = FALSE,
        column_title = "Interest",
        row_title = "Target",
        heatmap_legend_param = list(at = c(0, 1, 2, 3), 
                                    labels = c("0", "1", "2", "3")))



```
###### Retained
```{r}


#### Importing loops of target
temp1 <- runFisherExactCombinationTargetRetained("ATAC_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp2 <- runFisherExactCombinationTargetRetained("CTCF", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp3 <- runFisherExactCombinationTargetRetained("ESRRB_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp4 <- runFisherExactCombinationTargetRetained("H3K27ac_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp5 <- runFisherExactCombinationTargetRetained("KLF4_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp6 <- runFisherExactCombinationTargetRetained("NANOG_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp7 <- runFisherExactCombinationTargetRetained("OCT4_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp8 <- runFisherExactCombinationTargetRetained("SMC1", interestName,
                                     loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp9 <- runFisherExactCombinationTargetRetained("SOX2_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp10 <- runFisherExactCombinationTargetRetained("TBP", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)




data <- bind_rows(temp1, temp2, temp3, temp4, temp5,
         temp6, temp7, temp8, temp9, temp10)

# Visualization
library(circlize)

heatmap_data <- data %>% dplyr::select(target, interest, oddsRatio) %>%
pivot_wider(names_from = interest, values_from = oddsRatio) %>%
  column_to_rownames(var = "target")

pvalue_data <- data %>% dplyr::select(target, interest, pvalue) %>%
  pivot_wider(names_from = interest, values_from = pvalue) %>%
  column_to_rownames(var = "target")

col_fun <- colorRamp2(c(0, 1, 3), 
                      c("blue", "white", "red"))

Heatmap(as.matrix(heatmap_data),
        name = "Odds Ratio",
        col = col_fun,
        # Add annotation for p-values
        cell_fun = function(j, i, x, y, width, height, fill) {
          pval <- pvalue_data[i, j]
          label <- ifelse(pval > 0.05, "n.s.", sprintf("%.2e", pval))
          grid.text(label, x, y, gp = gpar(fontsize = 10))
        },
        # Customize the heatmap layout
        cluster_rows = TRUE,
        show_row_dend = FALSE,
        cluster_columns = FALSE,
        column_title = "Interest",
        row_title = "Target",
        heatmap_legend_param = list(at = c(0, 1, 2, 3), 
                                    labels = c("0", "1", "2", "3")))



```
## [2.24.2] Checking bookmarking - A485
#### All loops
```{r}
#### Importing loops of interest
interestName <- "allLoop"
# BACKGROUND loop
background <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all.bedpe"))
background.gr <- (extractAnchor(background))
# UP loop
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_A485vsDMSO_UP_diff0.2.bedpe"))
loop.up.gr <- (extractAnchor(loop.up))
# NO loop
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_A485vsDMSO_NO_diff0.2.bedpe"))
loop.no.gr <- (extractAnchor(loop.no))
# DOWN loop
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_A485vsDMSO_DOWN_diff0.2.bedpe"))
loop.down.gr <- (extractAnchor(loop.down))
```
##### All loops refined
```{r}


#### Importing loops of target
temp1 <- runFisherExactCombinationTarget("ATAC_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp2 <- runFisherExactCombinationTarget("CTCF", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp3 <- runFisherExactCombinationTarget("ESRRB_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp4 <- runFisherExactCombinationTarget("H3K27ac_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp5 <- runFisherExactCombinationTarget("KLF4_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp6 <- runFisherExactCombinationTarget("NANOG_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp7 <- runFisherExactCombinationTarget("OCT4_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp8 <- runFisherExactCombinationTarget("SMC1", interestName,
                                     loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp9 <- runFisherExactCombinationTarget("SOX2_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp10 <- runFisherExactCombinationTarget("TBP", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)




data <- bind_rows(temp1, temp2, temp3, temp4, temp5,
         temp6, temp7, temp8, temp9, temp10)

# Visualization
library(circlize)

heatmap_data <- data %>% dplyr::select(target, interest, oddsRatio) %>%
pivot_wider(names_from = interest, values_from = oddsRatio) %>%
  column_to_rownames(var = "target")

pvalue_data <- data %>% dplyr::select(target, interest, pvalue) %>%
  pivot_wider(names_from = interest, values_from = pvalue) %>%
  column_to_rownames(var = "target")

col_fun <- colorRamp2(c(0, 1, 3), 
                      c("blue", "white", "red"))

Heatmap(as.matrix(heatmap_data),
        name = "Odds Ratio",
        col = col_fun,
        # Add annotation for p-values
        cell_fun = function(j, i, x, y, width, height, fill) {
          pval <- pvalue_data[i, j]
          label <- ifelse(pval > 0.05, "n.s.", sprintf("%.2e", pval))
          grid.text(label, x, y, gp = gpar(fontsize = 10))
        },
        # Customize the heatmap layout
        cluster_rows = TRUE,
        show_row_dend = FALSE,
        cluster_columns = FALSE,
        column_title = "Interest",
        row_title = "Target",
        heatmap_legend_param = list(at = c(0, 1, 2, 3), 
                                    labels = c("0", "1", "2", "3")))

```
#### Reg loops
```{r}
#### Importing loops of interest
interestName <- "regLoop"
# BACKGROUND loop
background <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe"))
background.gr <- (extractAnchor(background))
# UP loop
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_UP_diff0.2.bedpe"))
loop.up.gr <- (extractAnchor(loop.up))
# NO loop
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_NO_diff0.2.bedpe"))
loop.no.gr <- (extractAnchor(loop.no))
# DOWN loop
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_DOWN_diff0.2.bedpe"))
loop.down.gr <- (extractAnchor(loop.down))

fwrite(as_tibble(loop.up.gr) %>% dplyr::select(seqnames, start, end), here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_UP_diff0.2.anchor.bed"), col.names = FALSE, sep = "\t")
fwrite(as_tibble(loop.no.gr) %>% dplyr::select(seqnames, start, end), here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_NO_diff0.2.anchor.bed"), col.names = FALSE, sep = "\t")
fwrite(as_tibble(loop.down.gr) %>% dplyr::select(seqnames, start, end), here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_DOWN_diff0.2.anchor.bed"), col.names = FALSE, sep = "\t")
```
##### Retained & Lost
```{r}


#### Importing loops of target
temp1 <- runFisherExactCombinationTarget("ATAC_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp2 <- runFisherExactCombinationTarget("CTCF", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp3 <- runFisherExactCombinationTarget("ESRRB_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp4 <- runFisherExactCombinationTarget("H3K27ac_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp5 <- runFisherExactCombinationTarget("KLF4_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp6 <- runFisherExactCombinationTarget("NANOG_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp7 <- runFisherExactCombinationTarget("OCT4_effie", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp8 <- runFisherExactCombinationTarget("SMC1", interestName,
                                     loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp9 <- runFisherExactCombinationTarget("SOX2_dsg", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)
temp10 <- runFisherExactCombinationTarget("TBP", interestName,
                                      loop.up.gr, loop.no.gr, loop.down.gr,
                                      background.gr)




data <- bind_rows(temp1, temp2, temp3, temp4, temp5,
         temp6, temp7, temp8, temp9, temp10)

# Visualization
library(circlize)

heatmap_data <- data %>% dplyr::select(target, interest, oddsRatio) %>%
pivot_wider(names_from = interest, values_from = oddsRatio) %>%
  column_to_rownames(var = "target")

pvalue_data <- data %>% dplyr::select(target, interest, pvalue) %>%
  pivot_wider(names_from = interest, values_from = pvalue) %>%
  column_to_rownames(var = "target")

col_fun <- colorRamp2(c(0, 1, 3), 
                      c("blue", "white", "red"))

Heatmap(as.matrix(heatmap_data),
        name = "Odds Ratio",
        col = col_fun,
        # Add annotation for p-values
        cell_fun = function(j, i, x, y, width, height, fill) {
          pval <- pvalue_data[i, j]
          label <- ifelse(pval > 0.05, "n.s.", sprintf("%.2e", pval))
          grid.text(label, x, y, gp = gpar(fontsize = 10))
        },
        # Customize the heatmap layout
        cluster_rows = TRUE,
        show_row_dend = FALSE,
        cluster_columns = FALSE,
        column_title = "Interest",
        row_title = "Target",
        heatmap_legend_param = list(at = c(0, 1, 2, 3), 
                                    labels = c("0", "1", "2", "3")))



```
### DEGs from RNA-seq
2.5kb from TSS, at least 1 bp overlap
#### 2i
```{r}
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3)) %>%
  dplyr::select(V1, TSS, V6)
colnames(gene.tb) <- c("chr", "TSS", "ensembl")

diff.RNA <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
diff.RNA <- diff.RNA %>% dplyr::left_join(gene.tb, by = c("ensembl_gene_id" = "ensembl")) %>%
  dplyr::filter(!is.na(TSS))
alpha <- 0.05
fcCutoff <- 0.5

diff.RNA <- diff.RNA %>% dplyr::mutate(diff = case_when(padj < alpha & shrinked_log2FC > fcCutoff ~ "UP",
                                                        padj < alpha & shrinked_log2FC < -fcCutoff ~ "DOWN",
                                                        TRUE ~ "NO"))


temp <- diff.RNA %>% dplyr::mutate(start = TSS - 2500,
                           end = TSS + 2500) %>%
  dplyr::select(chr, start, end, ensembl_gene_id, diff)


down.gr <- makeGRangesFromDataFrame(temp %>% dplyr::filter(diff == "DOWN"))
up.gr <- makeGRangesFromDataFrame(temp %>% dplyr::filter(diff == "UP"))
no.gr <- makeGRangesFromDataFrame(temp %>% dplyr::filter(diff == "NO"))
all.gr <- makeGRangesFromDataFrame(temp)

data <- runFisherExactCombinationTarget("H3K27ac_effie", "TSS",
                                      up.gr, no.gr, down.gr,
                                      all.gr)


# Visualization
library(circlize)

heatmap_data <- data %>% dplyr::select(target, interest, oddsRatio) %>%
pivot_wider(names_from = interest, values_from = oddsRatio) %>%
  column_to_rownames(var = "target")

pvalue_data <- data %>% dplyr::select(target, interest, pvalue) %>%
  pivot_wider(names_from = interest, values_from = pvalue) %>%
  column_to_rownames(var = "target")

col_fun <- colorRamp2(c(0, 1, 2), 
                      c("blue", "white", "red"))

Heatmap(as.matrix(heatmap_data),
        name = "Odds Ratio",
        col = col_fun,
        # Add annotation for p-values
        cell_fun = function(j, i, x, y, width, height, fill) {
          pval <- pvalue_data[i, j]
          label <- ifelse(pval > 0.05, "n.s.", sprintf("%.2e", pval))
          grid.text(label, x, y, gp = gpar(fontsize = 10))
        },
        # Customize the heatmap layout
        cluster_rows = TRUE,
        show_row_dend = FALSE,
        cluster_columns = FALSE,
        column_title = "Interest",
        row_title = "Target",
        heatmap_legend_param = list(at = c(0, 1, 2), 
                                    labels = c("0", "1", "2")))


## GO TEST
diff.RNA <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
diff.RNA <- diff.RNA %>% dplyr::left_join(gene.tb, by = c("ensembl_gene_id" = "ensembl")) %>%
  dplyr::filter(!is.na(TSS))
alpha <- 0.05
fcCutoff <- 0.5

diff.RNA <- diff.RNA %>% dplyr::mutate(diff = case_when(padj < alpha & shrinked_log2FC > fcCutoff ~ "UP",
                                                        padj < alpha & shrinked_log2FC < -fcCutoff ~ "DOWN",
                                                        TRUE ~ "NO"))
geneList <- (diff.RNA %>% dplyr::filter(diff == "DOWN"))$ensembl_gene_id
GO <- enrichGO(gene = geneList, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO.readable <- setReadable(GO, OrgDb = org.Mm.eg.db)

downStemGene <- unique(unlist((as.data.frame(GO) %>% dplyr::slice(c(4, 5, 14, 66)) %>% dplyr::mutate(geneID = strsplit(geneID, "/")))$geneID))
downStem.gr <- makeGRangesFromDataFrame(temp %>% dplyr::filter(ensembl_gene_id %in% downStemGene))

print(dotplot(GO, showCategory = 15, title = "") + 
        scale_color_continuous(limits = c(0, 0.05), low = "red", high = "black"))
geneList <- (diff.RNA %>% dplyr::filter(diff == "UP"))$ensembl_gene_id
GO <- enrichGO(gene = geneList, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO.readable <- setReadable(GO, OrgDb = org.Mm.eg.db)
print(dotplot(GO, showCategory = 15, title = "") + 
        scale_color_continuous(limits = c(0, 0.05), low = "red", high = "black"))


downNONStem.gr <- makeGRangesFromDataFrame(temp %>% dplyr::filter(diff == "DOWN",
                                                               !(ensembl_gene_id %in% downStemGene)))

print(dotplot(GO, showCategory = 15, title = "") + 
        scale_color_continuous(limits = c(0, 0.05), low = "red", high = "black"))
geneList <- (diff.RNA %>% dplyr::filter(diff == "UP"))$ensembl_gene_id
GO <- enrichGO(gene = geneList, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO.readable <- setReadable(GO, OrgDb = org.Mm.eg.db)
print(dotplot(GO, showCategory = 15, title = "") + 
        scale_color_continuous(limits = c(0, 0.05), low = "red", high = "black"))



### Checking bookmarking only in stem gene

data <- runFisherExactCombinationTarget("H3K27ac_effie", "TSS",
                                      down.gr, downNONStem.gr, downStem.gr,
                                      all.gr)


# Visualization
library(circlize)

heatmap_data <- data %>% dplyr::select(target, interest, oddsRatio) %>%
pivot_wider(names_from = interest, values_from = oddsRatio) %>%
  column_to_rownames(var = "target")

pvalue_data <- data %>% dplyr::select(target, interest, pvalue) %>%
  pivot_wider(names_from = interest, values_from = pvalue) %>%
  column_to_rownames(var = "target")

col_fun <- colorRamp2(c(0, 1, 2), 
                      c("blue", "white", "red"))

Heatmap(as.matrix(heatmap_data),
        name = "Odds Ratio",
        col = col_fun,
        # Add annotation for p-values
        cell_fun = function(j, i, x, y, width, height, fill) {
          pval <- pvalue_data[i, j]
          label <- ifelse(pval > 0.05, "n.s.", sprintf("%.2e", pval))
          grid.text(label, x, y, gp = gpar(fontsize = 10))
        },
        # Customize the heatmap layout
        cluster_rows = TRUE,
        show_row_dend = FALSE,
        cluster_columns = FALSE,
        column_title = "Interest",
        row_title = "Target",
        heatmap_legend_param = list(at = c(0, 1, 2), 
                                    labels = c("0", "1", "2")))
```
#### Epi
```{r}
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3)) %>%
  dplyr::select(V1, TSS, V6)
colnames(gene.tb) <- c("chr", "TSS", "ensembl")

diff.RNA <- fread(here(refDir, "diff_G1.A485.selected2_G1.Epi.A485_vs_G1.Epi.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
diff.RNA <- diff.RNA %>% dplyr::left_join(gene.tb, by = c("ensembl_gene_id" = "ensembl")) %>%
  dplyr::filter(!is.na(TSS))
alpha <- 0.05
fcCutoff <- 0.5

diff.RNA <- diff.RNA %>% dplyr::mutate(diff = case_when(padj < alpha & shrinked_log2FC > fcCutoff ~ "UP",
                                                        padj < alpha & shrinked_log2FC < -fcCutoff ~ "DOWN",
                                                        TRUE ~ "NO"))


temp <- diff.RNA %>% dplyr::mutate(start = TSS - 2500,
                           end = TSS + 2500) %>%
  dplyr::select(chr, start, end, ensembl_gene_id, diff)


down.gr <- makeGRangesFromDataFrame(temp %>% dplyr::filter(diff == "DOWN"))
up.gr <- makeGRangesFromDataFrame(temp %>% dplyr::filter(diff == "UP"))
no.gr <- makeGRangesFromDataFrame(temp %>% dplyr::filter(diff == "NO"))
all.gr <- makeGRangesFromDataFrame(temp)

data <- runFisherExactCombinationTarget("H3K27ac_effie", "TSS",
                                      up.gr, no.gr, down.gr,
                                      all.gr)


# Visualization
library(circlize)

heatmap_data <- data %>% dplyr::select(target, interest, oddsRatio) %>%
pivot_wider(names_from = interest, values_from = oddsRatio) %>%
  column_to_rownames(var = "target")

pvalue_data <- data %>% dplyr::select(target, interest, pvalue) %>%
  pivot_wider(names_from = interest, values_from = pvalue) %>%
  column_to_rownames(var = "target")

col_fun <- colorRamp2(c(0, 1, 2), 
                      c("blue", "white", "red"))

Heatmap(as.matrix(heatmap_data),
        name = "Odds Ratio",
        col = col_fun,
        # Add annotation for p-values
        cell_fun = function(j, i, x, y, width, height, fill) {
          pval <- pvalue_data[i, j]
          label <- ifelse(pval > 0.05, "n.s.", sprintf("%.2e", pval))
          grid.text(label, x, y, gp = gpar(fontsize = 10))
        },
        # Customize the heatmap layout
        cluster_rows = TRUE,
        show_row_dend = FALSE,
        cluster_columns = FALSE,
        column_title = "Interest",
        row_title = "Target",
        heatmap_legend_param = list(at = c(0, 1, 2), 
                                    labels = c("0", "1", "2")))


## GO
diff.RNA <- fread(here(refDir, "diff_G1.A485.selected2_G1.Epi.A485_vs_G1.Epi.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
diff.RNA <- diff.RNA %>% dplyr::left_join(gene.tb, by = c("ensembl_gene_id" = "ensembl")) %>%
  dplyr::filter(!is.na(TSS))
alpha <- 0.05
fcCutoff <- 0.5

diff.RNA <- diff.RNA %>% dplyr::mutate(diff = case_when(padj < alpha & shrinked_log2FC > fcCutoff ~ "UP",
                                                        padj < alpha & shrinked_log2FC < -fcCutoff ~ "DOWN",
                                                        TRUE ~ "NO"))

geneList <- (diff.RNA %>% dplyr::filter(diff == "DOWN"))$ensembl_gene_id
GO <- enrichGO(gene = geneList, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO.readable <- setReadable(GO, OrgDb = org.Mm.eg.db)
print(dotplot(GO, showCategory = 15, title = "") + 
        scale_color_continuous(limits = c(0, 0.05), low = "red", high = "black"))
geneList <- (diff.RNA %>% dplyr::filter(diff == "UP"))$ensembl_gene_id
GO <- enrichGO(gene = geneList, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO.readable <- setReadable(GO, OrgDb = org.Mm.eg.db)
print(dotplot(GO, showCategory = 15, title = "") + 
        scale_color_continuous(limits = c(0, 0.05), low = "red", high = "black"))

```


## [2.25] Checking genes related to regulatory loops
### dTAG, Comparing to Async
#### P-P, P-E
```{r}
name <- "chromo_cons_annoHierarchy"

geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-pe_ensemblList.tsv")),
                                      diffCutoff = 0.2)

id.G1SpecificPert <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_G1vsAsync_G1SpecificPert.bedpe"))$V7
id.AsyncSpecificPert <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_G1vsAsync_AsyncSpecificPert.bedpe"))$V7
id.bothPert <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_G1vsAsync_bothPert.bedpe"))$V7
id.bothRetained <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_G1vsAsync_bothRetained.bedpe"))$V7


geneList.G1SpecificPert <- (geneAnnoData %>% dplyr::filter(id %in% id.G1SpecificPert) %>% unnest(gene))$gene
geneList.AsyncSpecificPert <- (geneAnnoData %>% dplyr::filter(id %in% id.AsyncSpecificPert) %>% unnest(gene))$gene
geneList.bothPert <- (geneAnnoData %>% dplyr::filter(id %in% id.bothPert) %>% unnest(gene))$gene
geneList.bothRetained <- (geneAnnoData %>% dplyr::filter(id %in% id.bothRetained) %>% unnest(gene))$gene

GO.G1SpecificPert.df <- as.data.frame(enrichGO(gene = geneList.G1SpecificPert, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
GO.AsyncSpecificPer.df <- as.data.frame(enrichGO(gene = geneList.AsyncSpecificPert, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
GO.bothPert.df <- as.data.frame(enrichGO(gene = geneList.bothPert, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
GO.bothRetained.df <- as.data.frame(enrichGO(gene = geneList.bothRetained, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))

subset.G1SpecificPert <- GO.G1SpecificPert.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "G1SpecificPert") %>%
  dplyr::mutate(
    gr = sapply(GeneRatio, function(x) {
      # Split the string by "/"
      parts <- unlist(strsplit(x, "/"))
      # Convert to numeric and perform the division
      as.numeric(parts[1]) / as.numeric(parts[2])
    })
  ) %>% dplyr::arrange(desc(gr))

subset.AsyncSpecificPert <- GO.AsyncSpecificPer.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "AsyncSpecificPert") %>%
  dplyr::mutate(
    gr = sapply(GeneRatio, function(x) {
      # Split the string by "/"
      parts <- unlist(strsplit(x, "/"))
      # Convert to numeric and perform the division
      as.numeric(parts[1]) / as.numeric(parts[2])
    })
  ) %>% dplyr::arrange(desc(gr))

subset.bothPert <- GO.bothPert.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "bothPert") %>%
  dplyr::mutate(
    gr = sapply(GeneRatio, function(x) {
      # Split the string by "/"
      parts <- unlist(strsplit(x, "/"))
      # Convert to numeric and perform the division
      as.numeric(parts[1]) / as.numeric(parts[2])
    })
  ) %>% dplyr::arrange(desc(gr))

subset.bothRetained <- GO.bothRetained.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "bothRetained") %>%
  dplyr::mutate(
    gr = sapply(GeneRatio, function(x) {
      # Split the string by "/"
      parts <- unlist(strsplit(x, "/"))
      # Convert to numeric and perform the division
      as.numeric(parts[1]) / as.numeric(parts[2])
    })
  ) %>% dplyr::arrange(desc(gr))

data <- bind_rows(bind_rows(subset.G1SpecificPert, subset.bothPert), subset.bothRetained)

p <- ggplot(subset.G1SpecificPert, aes(x = group, y = Description, color = p.adjust, size = gr)) + 
  geom_point() + theme_bw() +
  scale_color_gradient(low = "red", high = "blue", limits = c(0, 0.05)) +
  scale_size_continuous(range = c(0, 3)) +
  labs(x = NULL, y = NULL) +
  theme(axis.text = element_text(size = 6),  # Set axis text size
        axis.title = element_text(size = 6), # Set axis title size (if not removed)
        legend.text = element_text(size = 6), # Set legend text size
        legend.title = element_text(size = 6)) 

```

### dTAG
#### P-P, P-E
```{r}
name <- "chromo_cons_annoHierarchy"

geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-pe_ensemblList.tsv")),
                                      diffCutoff = 0.2)

geneList.upLoop <- unique((geneAnnoData %>% dplyr::filter(updown_dTAG_DMSO == "UP") %>% unnest(gene))$gene)
geneList.noLoop <- unique((geneAnnoData %>% dplyr::filter(updown_dTAG_DMSO == "NO") %>% unnest(gene))$gene)
geneList.downLoop <- unique((geneAnnoData %>% dplyr::filter(updown_dTAG_DMSO == "DOWN") %>% unnest(gene))$gene)



# CHECKING HOW MANY GENES OVERLAP AMONG UP/NO/DOWN
sets_list <- list(up = geneList.upLoop,
                  no = geneList.noLoop,
                  down = geneList.downLoop)

# Create the Euler plot
euler_fit <- euler(sets_list)
plot(euler_fit,
     labels = TRUE,  # Display set labels
     fills = TRUE,   # Color fill the regions
     quantities = TRUE)


# GO
GO.up <- enrichGO(gene = geneList.upLoop, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
dotplot(GO.up)
GO.no <- enrichGO(gene = geneList.noLoop, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
dotplot(GO.no)
GO.down <- enrichGO(gene = geneList.downLoop, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
dotplot(GO.down)

# GO to only specific subset
geneList.upLoop.specific <- setdiff(geneList.upLoop, union(geneList.noLoop, geneList.downLoop))
geneList.noLoop.specific <- setdiff(geneList.noLoop, union(geneList.upLoop, geneList.downLoop))
geneList.downLoop.specific <- setdiff(geneList.downLoop, union(geneList.noLoop, geneList.upLoop))
GO.up <- enrichGO(gene = geneList.upLoop.specific, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
dotplot(GO.up)
GO.no <- enrichGO(gene = geneList.noLoop.specific, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
dotplot(GO.no)
GO.down <- enrichGO(gene = geneList.downLoop.specific, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
dotplot(GO.down)


## Checking percentage of bobbie early gene

## Importing Bobbie gene classification
geneCluster <- fread(here(refDir, "bobbie_gene_classification.csv")) %>%
  dplyr::select(gene, enst, Cluster)
colnames(geneCluster) <- c("gene", "ensembl_transcript_id", "cluster")


## Converting transcript ID to gene ID
idPair_tg <- getBM(attributes = c("ensembl_transcript_id", "ensembl_gene_id"),
                 filters = "ensembl_transcript_id",
                 values = geneCluster$ensembl_transcript,
                 mart = ensembl.v102)
geneCluster <- geneCluster %>% dplyr::left_join(idPair_tg, by = c("ensembl_transcript_id"))

# Making data for stacked barplot
countGene <- function(geneCluster, groupName, clusterName){
  num <- nrow(geneCluster %>% dplyr::filter(ensembl_gene_id %in% groupName,
                                            cluster %in% clusterName))
  return(num)
}
countGeneList <- function(geneCluster, groupName){
  n1 <- countGene(geneCluster, groupName, "Early")
  n2 <- countGene(geneCluster, groupName, "Middle")
  n3 <-countGene(geneCluster, groupName, "Late")
  n4 <- countGene(geneCluster, groupName, "Transient")
  return(c(n1, n2, n3, n4))
}

group <- c(rep("up", 4), rep("no", 4), rep("down", 4))
cluster <- rep(c("Early", "Middle", "Late", "Transient"), 3)
cluster <- factor(cluster, levels = c("Early", "Middle", "Late", "Transient"))
value <- c(countGeneList(geneCluster, geneList.upLoop),
           countGeneList(geneCluster, geneList.noLoop),
           countGeneList(geneCluster, geneList.downLoop))

data <- data.frame(group, cluster, value)
ggplot(data, aes(fill=cluster, y=value, x=group)) + 
    geom_bar(position="stack", stat = "identity") + theme_classic()
ggplot(data, aes(fill=cluster, y=value, x=group)) + 
    geom_bar(position="fill", stat = "identity") + theme_classic()

value <- c(countGeneList(geneCluster, geneList.upLoop.specific),
           countGeneList(geneCluster, geneList.noLoop.specific),
           countGeneList(geneCluster, geneList.downLoop.specific))

data <- data.frame(group, cluster, value)
# Plotting
ggplot(data, aes(fill=cluster, y=value, x=group)) + 
    geom_bar(position="stack", stat = "identity") + theme_classic()
ggplot(data, aes(fill=cluster, y=value, x=group)) + 
    geom_bar(position="fill", stat = "identity") + theme_classic()

```


#### P-N
```{r}
name <- "chromo_cons_annoHierarchy"

geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = 0.2)

geneList.upLoop <- unique((geneAnnoData %>% dplyr::filter(updown_dTAG_DMSO == "UP") %>% unnest(gene))$gene)
geneList.noLoop <- unique((geneAnnoData %>% dplyr::filter(updown_dTAG_DMSO == "NO") %>% unnest(gene))$gene)
geneList.downLoop <- unique((geneAnnoData %>% dplyr::filter(updown_dTAG_DMSO == "DOWN") %>% unnest(gene))$gene)



# CHECKING HOW MANY GENES OVERLAP AMONG UP/NO/DOWN
sets_list <- list(up = geneList.upLoop,
                  no = geneList.noLoop,
                  down = geneList.downLoop)

# Create the Euler plot
euler_fit <- euler(sets_list)
plot(euler_fit,
     labels = TRUE,  # Display set labels
     fills = TRUE,   # Color fill the regions
     quantities = TRUE)


# GO
GO.up <- enrichGO(gene = geneList.upLoop, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
dotplot(GO.up)
GO.no <- enrichGO(gene = geneList.noLoop, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
dotplot(GO.no)
GO.down <- enrichGO(gene = geneList.downLoop, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
dotplot(GO.down)

# GO to only specific subset
geneList.upLoop.specific <- setdiff(geneList.upLoop, union(geneList.noLoop, geneList.downLoop))
geneList.noLoop.specific <- setdiff(geneList.noLoop, union(geneList.upLoop, geneList.downLoop))
geneList.downLoop.specific <- setdiff(geneList.downLoop, union(geneList.noLoop, geneList.upLoop))
GO.up <- enrichGO(gene = geneList.upLoop.specific, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
dotplot(GO.up)
GO.no <- enrichGO(gene = geneList.noLoop.specific, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
dotplot(GO.no)
GO.down <- enrichGO(gene = geneList.downLoop.specific, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
dotplot(GO.down)


## Checking percentage of bobbie early gene

## Importing Bobbie gene classification
geneCluster <- fread(here(refDir, "bobbie_gene_classification.csv")) %>%
  dplyr::select(gene, enst, Cluster)
colnames(geneCluster) <- c("gene", "ensembl_transcript_id", "cluster")


## Converting transcript ID to gene ID
idPair_tg <- getBM(attributes = c("ensembl_transcript_id", "ensembl_gene_id"),
                 filters = "ensembl_transcript_id",
                 values = geneCluster$ensembl_transcript,
                 mart = ensembl.v102)
geneCluster <- geneCluster %>% dplyr::left_join(idPair_tg, by = c("ensembl_transcript_id"))

# Making data for stacked barplot
countGene <- function(geneCluster, groupName, clusterName){
  num <- nrow(geneCluster %>% dplyr::filter(ensembl_gene_id %in% groupName,
                                            cluster %in% clusterName))
  return(num)
}
countGeneList <- function(geneCluster, groupName){
  n1 <- countGene(geneCluster, groupName, "Early")
  n2 <- countGene(geneCluster, groupName, "Middle")
  n3 <-countGene(geneCluster, groupName, "Late")
  n4 <- countGene(geneCluster, groupName, "Transient")
  return(c(n1, n2, n3, n4))
}

group <- c(rep("up", 4), rep("no", 4), rep("down", 4))
cluster <- rep(c("Early", "Middle", "Late", "Transient"), 3)
cluster <- factor(cluster, levels = c("Early", "Middle", "Late", "Transient"))
value <- c(countGeneList(geneCluster, geneList.upLoop),
           countGeneList(geneCluster, geneList.noLoop),
           countGeneList(geneCluster, geneList.downLoop))

data <- data.frame(group, cluster, value)
ggplot(data, aes(fill=cluster, y=value, x=group)) + 
    geom_bar(position="stack", stat = "identity") + theme_classic()
ggplot(data, aes(fill=cluster, y=value, x=group)) + 
    geom_bar(position="fill", stat = "identity") + theme_classic()

value <- c(countGeneList(geneCluster, geneList.upLoop.specific),
           countGeneList(geneCluster, geneList.noLoop.specific),
           countGeneList(geneCluster, geneList.downLoop.specific))

data <- data.frame(group, cluster, value)
# Plotting
ggplot(data, aes(fill=cluster, y=value, x=group)) + 
    geom_bar(position="stack", stat = "identity") + theme_classic()
ggplot(data, aes(fill=cluster, y=value, x=group)) + 
    geom_bar(position="fill", stat = "identity") + theme_classic()



################## Checking how those genes behave in A485
diff.RNA <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)

alpha <- 0.05
fcCutoff <- 0.5
diff.RNA.out <- diff.RNA %>% dplyr::mutate(diff = case_when(padj < alpha & shrinked_log2FC > fcCutoff ~ "UP",
                                            padj < alpha & shrinked_log2FC < -fcCutoff ~ "DOWN",
                                            TRUE ~ "NO"),
                           geneGroup = case_when(ensembl_gene_id %in% geneList.upLoop ~ "cohesin-upLoop",
                                                 ensembl_gene_id %in% geneList.noLoop ~ "cohesin-noLoop",
                                                 ensembl_gene_id %in% geneList.downLoop ~ "cohesin-downLoop",
                                                 TRUE ~ "NA"))


ggplot(diff.RNA.out, aes(x = geneGroup, fill = diff)) +
  geom_bar(position="fill") + theme_bw()


upLoop.downPerc <- nrow(diff.RNA.out %>% dplyr::filter(geneGroup == "cohesin-upLoop", diff == "DOWN"))/nrow(diff.RNA.out %>% dplyr::filter(geneGroup == "cohesin-upLoop"))
upLoop.upPerc <- nrow(diff.RNA.out %>% dplyr::filter(geneGroup == "cohesin-upLoop", diff == "UP"))/nrow(diff.RNA.out %>% dplyr::filter(geneGroup == "cohesin-upLoop"))

noLoop.downPerc <- nrow(diff.RNA.out %>% dplyr::filter(geneGroup == "cohesin-noLoop", diff == "DOWN"))/nrow(diff.RNA.out %>% dplyr::filter(geneGroup == "cohesin-noLoop"))
noLoop.upPerc <- nrow(diff.RNA.out %>% dplyr::filter(geneGroup == "cohesin-noLoop", diff == "UP"))/nrow(diff.RNA.out %>% dplyr::filter(geneGroup == "cohesin-noLoop"))

downLoop.downPerc <- nrow(diff.RNA.out %>% dplyr::filter(geneGroup == "cohesin-downLoop", diff == "DOWN"))/nrow(diff.RNA.out %>% dplyr::filter(geneGroup == "cohesin-downLoop"))
downLoop.upPerc <- nrow(diff.RNA.out %>% dplyr::filter(geneGroup == "cohesin-downLoop", diff == "UP"))/nrow(diff.RNA.out %>% dplyr::filter(geneGroup == "cohesin-downLoop"))


data <- tibble(geneGroup = c("cohesin-upLoop", "cohesin-upLoop", 
                     "cohesin-noLoop", "cohesin-noLoop", 
                     "cohesin-downLoop", "cohesin-downLoop"),
       diff = rep(c("DOWN", "UP"), 3),
       perc = c(upLoop.downPerc, upLoop.upPerc, noLoop.downPerc, noLoop.upPerc, downLoop.downPerc, downLoop.upPerc)*100)

ggplot(data, aes(x = geneGroup, y = perc, fill = diff )) +
  geom_bar(stat = "identity", position = "dodge") + theme_bw()




geneList.noLoopUnique <- geneList.noLoop[!geneList.noLoop %in% unique(c(geneList.downLoop, geneList.upLoop))]
geneList.downLoopUnique <- geneList.downLoop[!geneList.downLoop %in% unique(c(geneList.noLoop, geneList.upLoop))]
geneList.upLoopUnique <- geneList.upLoop[!geneList.upLoop %in% unique(c(geneList.noLoop, geneList.downLoop))]

diff.RNA.out <- diff.RNA %>% dplyr::mutate(diff = case_when(padj < alpha & shrinked_log2FC > fcCutoff ~ "UP",
                                            padj < alpha & shrinked_log2FC < -fcCutoff ~ "DOWN",
                                            TRUE ~ "NO"),
                           geneGroup = case_when(ensembl_gene_id %in% geneList.upLoopUnique  ~ "cohesin-upLoop",
                                                 ensembl_gene_id %in% geneList.noLoopUnique ~ "cohesin-noLoop",
                                                 ensembl_gene_id %in% geneList.downLoopUnique ~ "cohesin-downLoop",
                                                 TRUE ~ "NA"))


ggplot(diff.RNA.out, aes(x = geneGroup, fill = diff)) +
  geom_bar(position="fill") + theme_bw()


upLoop.downPerc <- nrow(diff.RNA.out %>% dplyr::filter(geneGroup == "cohesin-upLoop", diff == "DOWN"))/nrow(diff.RNA.out %>% dplyr::filter(geneGroup == "cohesin-upLoop"))
upLoop.upPerc <- nrow(diff.RNA.out %>% dplyr::filter(geneGroup == "cohesin-upLoop", diff == "UP"))/nrow(diff.RNA.out %>% dplyr::filter(geneGroup == "cohesin-upLoop"))

noLoop.downPerc <- nrow(diff.RNA.out %>% dplyr::filter(geneGroup == "cohesin-noLoop", diff == "DOWN"))/nrow(diff.RNA.out %>% dplyr::filter(geneGroup == "cohesin-noLoop"))
noLoop.upPerc <- nrow(diff.RNA.out %>% dplyr::filter(geneGroup == "cohesin-noLoop", diff == "UP"))/nrow(diff.RNA.out %>% dplyr::filter(geneGroup == "cohesin-noLoop"))

downLoop.downPerc <- nrow(diff.RNA.out %>% dplyr::filter(geneGroup == "cohesin-downLoop", diff == "DOWN"))/nrow(diff.RNA.out %>% dplyr::filter(geneGroup == "cohesin-downLoop"))
downLoop.upPerc <- nrow(diff.RNA.out %>% dplyr::filter(geneGroup == "cohesin-downLoop", diff == "UP"))/nrow(diff.RNA.out %>% dplyr::filter(geneGroup == "cohesin-downLoop"))


data <- tibble(geneGroup = c("cohesin-upLoop", "cohesin-upLoop", 
                     "cohesin-noLoop", "cohesin-noLoop", 
                     "cohesin-downLoop", "cohesin-downLoop"),
       diff = rep(c("DOWN", "UP"), 3),
       perc = c(upLoop.downPerc, upLoop.upPerc, noLoop.downPerc, noLoop.upPerc, downLoop.downPerc, downLoop.upPerc)*100)

ggplot(data, aes(x = geneGroup, y = perc, fill = diff )) +
  geom_bar(stat = "identity", position = "dodge") + theme_bw()

```
### A485
#### P-P, P-E
```{r}
name <- "chromo_cons_annoHierarchy"

geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-pe_ensemblList.tsv")),
                                      diffCutoff = 0.2)

geneList.upLoop <- unique((geneAnnoData %>% dplyr::filter(updown_A485_DMSO == "UP") %>% unnest(gene))$gene)
geneList.noLoop <- unique((geneAnnoData %>% dplyr::filter(updown_A485_DMSO == "NO") %>% unnest(gene))$gene)
geneList.downLoop <- unique((geneAnnoData %>% dplyr::filter(updown_A485_DMSO == "DOWN") %>% unnest(gene))$gene)



# CHECKING HOW MANY GENES OVERLAP AMONG UP/NO/DOWN
sets_list <- list(up = geneList.upLoop,
                  no = geneList.noLoop,
                  down = geneList.downLoop)

# Create the Euler plot
euler_fit <- euler(sets_list)
plot(euler_fit,
     labels = TRUE,  # Display set labels
     fills = TRUE,   # Color fill the regions
     quantities = TRUE)


# GO
GO.up <- enrichGO(gene = geneList.upLoop, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
dotplot(GO.up, showCategory = 15)
GO.no <- enrichGO(gene = geneList.noLoop, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
dotplot(GO.no, showCategory = 15)
GO.down <- enrichGO(gene = geneList.downLoop, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
dotplot(GO.down, showCategory = 20)

# GO to only specific subset
geneList.upLoop.specific <- setdiff(geneList.upLoop, union(geneList.noLoop, geneList.downLoop))
geneList.noLoop.specific <- setdiff(geneList.noLoop, union(geneList.upLoop, geneList.downLoop))
geneList.downLoop.specific <- setdiff(geneList.downLoop, union(geneList.noLoop, geneList.upLoop))
GO.up <- enrichGO(gene = geneList.upLoop.specific, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
dotplot(GO.up)
GO.no <- enrichGO(gene = geneList.noLoop.specific, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
dotplot(GO.no)
GO.down <- enrichGO(gene = geneList.downLoop.specific, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
dotplot(GO.down, showCategory = 20)


```

#### P-N
```{r}
name <- "chromo_cons_annoHierarchy"

geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = 0.2)

geneList.upLoop <- unique((geneAnnoData %>% dplyr::filter(updown_A485_DMSO == "UP") %>% unnest(gene))$gene)
geneList.noLoop <- unique((geneAnnoData %>% dplyr::filter(updown_A485_DMSO == "NO") %>% unnest(gene))$gene)
geneList.downLoop <- unique((geneAnnoData %>% dplyr::filter(updown_A485_DMSO == "DOWN") %>% unnest(gene))$gene)



# CHECKING HOW MANY GENES OVERLAP AMONG UP/NO/DOWN
sets_list <- list(up = geneList.upLoop,
                  no = geneList.noLoop,
                  down = geneList.downLoop)

# Create the Euler plot
euler_fit <- euler(sets_list)
plot(euler_fit,
     labels = TRUE,  # Display set labels
     fills = TRUE,   # Color fill the regions
     quantities = TRUE)


# GO
GO.up <- enrichGO(gene = geneList.upLoop, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
dotplot(GO.up)
GO.no <- enrichGO(gene = geneList.noLoop, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
dotplot(GO.no)
GO.down <- enrichGO(gene = geneList.downLoop, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
dotplot(GO.down, showCategory = 20)

# GO to only specific subset
geneList.upLoop.specific <- setdiff(geneList.upLoop, union(geneList.noLoop, geneList.downLoop))
geneList.noLoop.specific <- setdiff(geneList.noLoop, union(geneList.upLoop, geneList.downLoop))
geneList.downLoop.specific <- setdiff(geneList.downLoop, union(geneList.noLoop, geneList.upLoop))
GO.up <- enrichGO(gene = geneList.upLoop.specific, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
dotplot(GO.up)
GO.no <- enrichGO(gene = geneList.noLoop.specific, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
dotplot(GO.no)
GO.down <- enrichGO(gene = geneList.downLoop.specific, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
dotplot(GO.down)


```
## [2.26] Checking the ChIP intensity at anchors
This doesn't have to be restricted to P-N loops. Let's check all loops first.
Also instead of density (mean), median could be better choice. Sum is not appropriate here since
the size of anchors are not same.
#### Limited to peaks to reduce noise
##### dTAG
```{r}
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(loop ==group1) )$sumScore
  distance2 <- (data %>% dplyr::filter(loop ==group2) )$sumScore
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

name <- "chromo_cons_annoHierarchy"

# UP loop
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_UP_diff0.2.bedpe"))
anchor.up <- (extractAnchor(loop.up))
# NO loop
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_NO_diff0.2.bedpe"))
anchor.no <- (extractAnchor(loop.no))
# UP NO
loop.upno <- bind_rows(loop.up, loop.no)
anchor.upno <- (extractAnchor(loop.upno))
# DOWN loop
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_DOWN_diff0.2.bedpe"))
anchor.down <- (extractAnchor(loop.down))

getSumScores <- function(track, anchor) {
  # Find overlaps between all anchors and track regions at once
  overlaps <- findOverlaps(anchor, track)
  
  # Extract the scores and corresponding anchor indices
  anchor_indices <- queryHits(overlaps)
  track_scores <- score(track)[subjectHits(overlaps)]
  
  # Use tapply to calculate the median scores for each anchor
  median_scores <- tapply(track_scores, anchor_indices, mean, na.rm = TRUE)
  
  # Initialize a numeric vector to store the median scores for each anchor
  all_median_scores <- rep(NA, length(anchor))
  
  # Populate the median scores for the anchors that have overlaps
  all_median_scores[as.numeric(names(median_scores))] <- median_scores
  
  return(all_median_scores)
}

plotSumScores <- function(track, peak, name){
  peakTrack <- track[unique(queryHits(findOverlaps(track, peak)))]
  a <- getSumScores(peakTrack, anchor.up)
  b <- getSumScores(peakTrack, anchor.no)
  c <- getSumScores(peakTrack, anchor.down)
  a.tb <- tibble(loop = "UP",
                 sumScore = a)
  b.tb <- tibble(loop = "NO",
                 sumScore = b)
  c.tb <- tibble(loop = "DOWN",
                 sumScore = c)
  data <- bind_rows(a.tb, b.tb, c.tb) %>% drop_na()
  ggplot(data, aes(x = loop, y = sumScore)) + 
    labs(x = NULL, y = paste0(name, " average peak score per anchor")) +
    geom_violin(aes(fill = group), color = "black",
                linewidth = lineThick * mmToLineUnit, lineend = "square",
                show.legend = FALSE) +
    geom_boxplot(width = 0.1, color = "black",
                 linewidth = lineThick * mmToLineUnit, lineend = "square",
                 outlier.size = 1, outlier.stroke = NA) + theme_classic() +
    stat_summary(
      aes(group = loop), fun = mean,
      geom = "point", shape = 21, size = 1,
      fill = "red", color = "black"
    ) +
    coord_cartesian(ylim = c(0, quantile(data$sumScore, 0.95))) + theme(
    axis.title = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45, hjust = 1, vjust = 1
    ),
    axis.line = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )
}

plotSumScoresBinary <- function(track, peak, name, anchor.upno, anchor.down){
  peakTrack <- track[unique(queryHits(findOverlaps(track, peak)))]
  b <- getSumScores(peakTrack, anchor.upno)
  c <- getSumScores(peakTrack, anchor.down)
  b.tb <- tibble(loop = "UP/NO",
                 sumScore = b)
  c.tb <- tibble(loop = "DOWN",
                 sumScore = c)
  data <- bind_rows(b.tb, c.tb) %>% drop_na()
  data$loop <- factor(data$loop, levels = c("UP/NO", "DOWN"))
  
  p12 <- getPvalWilcox(data, "UP/NO", "DOWN")
  p <-  ggplot(data, aes(x = name, fill = loop, y = sumScore)) + 
    labs(x = NULL, y = paste0("Average ChIP peak score at anchor)")) +
    introdataviz::geom_split_violin(linewidth = lineThick * mmToLineUnit, lineend = "square",
                                    alpha = .4) +
    geom_boxplot(width = 0.3, color = "black",
                 linewidth = lineThick * mmToLineUnit, lineend = "square",
                 outlier.shape = NA,  alpha = 0.6, show.legend = FALSE) + theme_classic() +
    stat_summary(
      aes(group = loop), fun = mean,
      geom = "point", shape = 21, size = 0.5,
      fill = "black", color = "black", position = position_dodge(.3)
    ) + theme(
      axis.title = element_text(
        size = fontSizeM, family = fontType, color = "#000000"
      ),
      axis.text = element_text(
        size = fontSizeM, family = fontType, color = "#000000"
      ),
      axis.text.x = element_text(
        angle = 0,
      ),
      axis.text.y = element_text(
        size = fontSizeS,
      ),
      axis.line = element_line(
        color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
      ),
      axis.ticks = element_line(
        color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
      ),
      panel.background = element_rect(fill = "transparent"),
      legend.text = element_text(family = fontType, size = fontSizeS),
      legend.title = element_text(family = fontType, size = fontSizeS)
    ) + 
    coord_cartesian(ylim = c(quantile(data$sumScore, 0.0), quantile(data$sumScore, 0.9))) + 
    annotate(
     "text", x = 1, y = quantile(data$sumScore, 0.5),
     label = paste0("p12: ", convPvalue(p12)),
     color = "black", hjust = 0, size = 3
   ) +   guides(
    fill = guide_legend(
        keywidth = 0.2,  # Adjust the width of the legend keys
        keyheight = 0.2  # Adjust the height of the legend keys
    )
  )
  
   fileName <- paste0("ChIP_peak_avgPeakScore_", name)
  width <- panelSize(1.5)*mmToInch
  height <- panelSize(1.2)*mmToInch
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}

# Sum peak score
####
track <- import(here(refDir, "33255_H3K4me3_04-745_Bruce-4_trim_q20_dedup_black_depthNorm.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "33255_H3K4me3_04-745_Bruce-4_peaks.mergePeak.bed"))
#plotSumScores(track, peak, "H3K4me3")
plotSumScoresBinary(track, peak, "H3K4me3", anchor.upno, anchor.down)

track <- import(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.narrowPeak.bed"))
#plotSumScores(track, peak, "H3K27ac")
plotSumScoresBinary(track, peak, "H3K27ac", anchor.upno, anchor.down)

track <- import(here(refDir, "GSM2683440_J1_H3K14ac_mm10Lifted.black.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "GSM2683440_J1_H3K14ac_mm10Lifted.bed"))
#plotSumScores(track, peak, "H3K14ac")
plotSumScoresBinary(track, peak, "H3K14ac", anchor.upno, anchor.down)

track <- import(here(refDir, "33248_CTCF_07-729_Bruce-4_trim_q20_dedup_black_depthNorm.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "33248_CTCF_07-729_Bruce-4_peaks.mergePeak.bed"))
#plotSumScores(track, peak, "CTCF")
plotSumScoresBinary(track, peak, "CTCF", anchor.upno, anchor.down)

track <- import(here(refDir, "33250_RAD21_ab992_Bruce-4_trim_q20_dedup_black_depthNorm.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "33250_RAD21_ab992_Bruce-4_peaks.mergePeak.bed"))
#plotSumScores(track, peak, "RAD21")
plotSumScoresBinary(track, peak, "RAD21", anchor.upno, anchor.down)


```
##### A485
```{r}
name <- "chromo_cons_annoHierarchy"

# UP loop
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_UP_diff0.2.bedpe"))
anchor.up <- (extractAnchor(loop.up))
# NO loop
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_NO_diff0.2.bedpe"))
anchor.no <- (extractAnchor(loop.no))
# DOWN loop
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_DOWN_diff0.2.bedpe"))
anchor.down <- (extractAnchor(loop.down))

getSumScores <- function(track, anchor) {
  # Find overlaps between all anchors and track regions at once
  overlaps <- findOverlaps(anchor, track)
  
  # Extract the scores and corresponding anchor indices
  anchor_indices <- queryHits(overlaps)
  track_scores <- score(track)[subjectHits(overlaps)]
  
  # Use tapply to calculate the median scores for each anchor
  median_scores <- tapply(track_scores, anchor_indices, sum, na.rm = TRUE)
  
  # Initialize a numeric vector to store the median scores for each anchor
  all_median_scores <- rep(NA, length(anchor))
  
  # Populate the median scores for the anchors that have overlaps
  all_median_scores[as.numeric(names(median_scores))] <- median_scores
  
  return(all_median_scores)
}

plotSumScores <- function(track, peak, name){
  peakTrack <- track[unique(queryHits(findOverlaps(track, peak)))]
  a <- getSumScores(peakTrack, anchor.up)
  b <- getSumScores(peakTrack, anchor.no)
  c <- getSumScores(peakTrack, anchor.down)
  a.tb <- tibble(loop = "UP",
                 sumScore = a)
  b.tb <- tibble(loop = "NO",
                 sumScore = b)
  c.tb <- tibble(loop = "DOWN",
                 sumScore = c)
  data <- bind_rows(a.tb, b.tb, c.tb) %>% drop_na()
  ggplot(data, aes(x = loop, y = sumScore)) + geom_boxplot(outlier.shape = NA) + theme_classic() + ggtitle(paste0(name, " sum peak")) +
    coord_cartesian(ylim = c(0, quantile(data$sumScore, 0.9)))
}

# Sum peak score
####
track <- import(here(refDir, "33255_H3K4me3_04-745_Bruce-4_trim_q20_dedup_black_depthNorm.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "33255_H3K4me3_04-745_Bruce-4_peaks.mergePeak.bed"))
plotSumScores(track, peak, "H3K4me3")

track <- import(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.narrowPeak.bed"))
plotSumScores(track, peak, "H3K27ac")

track <- import(here(refDir, "GSM2683440_J1_H3K14ac_mm10Lifted.black.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "GSM2683440_J1_H3K14ac_mm10Lifted.bed"))
plotSumScores(track, peak, "H3K14ac")

track <- import(here(refDir, "33248_CTCF_07-729_Bruce-4_trim_q20_dedup_black_depthNorm.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "33248_CTCF_07-729_Bruce-4_peaks.mergePeak.bed"))
plotSumScores(track, peak, "CTCF")

track <- import(here(refDir, "33250_RAD21_ab992_Bruce-4_trim_q20_dedup_black_depthNorm.bw"), format = "BigWig")
peak.temp <- fread(here(refDir, "33250_RAD21_ab992_Bruce-4_peaks.mergePeak.bed")) %>%
  dplyr::mutate(V2 = V2 - 1000,
                V3 = V3 + 1000)
colnames(peak.temp) <- c("chr", "start", "end")
peak <- makeGRangesFromDataFrame(peak.temp)
plotSumScores(track, peak, "YY1")


### For those with only summit
track <- import(here(refDir, "GSM5571895_ESC_YY1_1.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "GSM5571895_ESC_YY1_summit.bed"))
plotSumScores(track, peak, "RAD21")

### FOR THOSE WITHOUT PEAKS
track <- import(here(refDir, "GSM2082708_ESC.H3K27me3.1_mm10Lifted.black.bw"), format = "BigWig")
a <- getSumScores(track, anchor.up)
b <- getSumScores(track, anchor.no)
c <- getSumScores(track, anchor.down)
a.tb <- tibble(loop = "UP",
               sumScore = a)
b.tb <- tibble(loop = "NO",
               sumScore = b)
c.tb <- tibble(loop = "DOWN",
               sumScore = c)
data <- bind_rows(a.tb, b.tb, c.tb) %>% drop_na()
ggplot(data, aes(x = loop, y = sumScore)) + geom_boxplot(outlier.shape = NA) + theme_classic() + ggtitle(paste0("H3K27me3 sum no peak")) +
  coord_cartesian(ylim = c(0, quantile(data$sumScore, 0.9)))
```
#### Peak density instead of intensity
```{r}
name <- "chromo_cons_annoHierarchy"

# UP loop
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_UP_diff0.2.bedpe"))
anchor.up <- (extractAnchor(loop.up))
# NO loop
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_NO_diff0.2.bedpe"))
anchor.no <- (extractAnchor(loop.no))
# DOWN loop
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_DOWN_diff0.2.bedpe"))
anchor.down <- (extractAnchor(loop.down))


peak.H3K4me3 <- importPeak(here(refDir, "33255_H3K4me3_04-745_Bruce-4_peaks.mergePeak.bed"))
peak.H3K27ac <- importPeak(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.narrowPeak.bed"))
peak.CTCF <- importPeak(here(refDir, "33248_CTCF_07-729_Bruce-4_peaks.mergePeak.bed"))
peak.temp <- fread(here(refDir, "33250_RAD21_ab992_Bruce-4_peaks.mergePeak.bed")) %>%
  dplyr::mutate(V2 = V2 - 1000,
                V3 = V3 + 1000)
colnames(peak.temp) <- c("chr", "start", "end")
peak.RAD21 <- makeGRangesFromDataFrame(peak.temp)


calculatePeakDensity <- function(peak, note){
  
  overlap_counts <- countOverlaps(anchor.up, peak)
  anchor_widths <- (width(anchor.up)-1)/1000
  density <- overlap_counts / anchor_widths
  data1 <- tibble(type = "UP",
                  densityPerKb = density)
  
  overlap_counts <- countOverlaps(anchor.no, peak)
  anchor_widths <- (width(anchor.no)-1)/1000
  density <- overlap_counts / anchor_widths
  mcols(anchor.no)$density <- density
  data2 <- tibble(type = "NO",
                  densityPerKb = density)
  
  overlap_counts <- countOverlaps(anchor.down, peak)
  anchor_widths <- (width(anchor.down)-1)/1000
  density <- overlap_counts / anchor_widths
  mcols(anchor.down)$density <- density
  data3 <- tibble(type = "DOWN",
                  densityPerKb = density)
  
  data <- bind_rows(data1, data2, data3)
  
  ggplot(data, aes(x = type, y = densityPerKb))  + geom_violin() + geom_boxplot(outlier.shape = NA, width = 0.1 ) + theme_bw() + ggtitle(note) +
    coord_cartesian(ylim = c(0, quantile(data$sumScore, 0.9)))
}

calculatePeakDensity(peak.H3K4me3, "H3K4me3")
calculatePeakDensity(peak.H3K27ac, "H3K27ac")
calculatePeakDensity(peak.CTCF, "CTCF")
calculatePeakDensity(peak.RAD21, "RAD21")

```

## [2.27] Overlap with SE?
#### Q1. How many SEs are covered with Micro-C loops?
```{r}
name <- "chromo_cons_annoHierarchy"
loop <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe"))
anchor <- extractAnchor(loop)

### Whyte
n <- length(unique(subjectHits(findOverlaps(anchor, peak.Whyte.SE))))
total <- length(peak.Whyte.SE)
counts <- c(n, total - n)
labels <- paste(c("covered", "NOT covered"), counts)
pie(counts, labels = labels,  main = paste0("Whyte SE coverage \n", total), col = c("grey", "white"))

fileName <- here(figDir, "pie_SE_Whyte")
width <- panelSize(4)*mmToInch
height <- panelSize(4)*mmToInch
# svglite(paste0(fileName, ".svg"), width = width, height =height)
# pie(counts, labels = labels,  main = paste0("Whyte SE coverage \n", total), col = c("grey", "white"),
#     cex = 1, cex.main = 1)
# dev.off()



### Dylan
n <- length(unique(subjectHits(findOverlaps(anchor, peak.Dylan.SE))))
total <- length(peak.Dylan.SE)
counts <- c(n, total - n)
labels <- paste(c("covered", "NOT covered"), counts)
pie(counts, labels = labels, main = paste0("Dylan SE coverage \n", total), col = c("grey", "white"))

fileName <- here(figDir, "pie_SE_Murphy")
width <- panelSize(4)*mmToInch
height <- panelSize(4)*mmToInch
# svglite(paste0(fileName, ".svg"), width = width, height =height)
# pie(counts, labels = labels, main = paste0("Murphy SE coverage \n", total), col = c("grey", "white"))
# 
# dev.off()

```
#### Q2. Fisher's exact test?
##### dTAG
```{r}
name <- "chromo_cons_annoHierarchy"
loop <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe"))

getOverlapLoopNum <- function(loop, peak){
  anchor1 <- GRanges(seqnames = loop$V1, ranges = IRanges(start = loop$V2, end = loop$V3))
  anchor2 <- GRanges(seqnames = loop$V4, ranges = IRanges(start = loop$V5, end = loop$V6))
  a <- queryHits(findOverlaps(anchor1, peak))
  b <- queryHits(findOverlaps(anchor2, peak))
  return(length(unique(c(a, b))))
}

getSEOverlapFisher <- function(allLoop, subsetLoop, peak){
  all.overlap <- getOverlapLoopNum(allLoop, peak)
  all.notOverlap <- nrow(allLoop) - all.overlap
  
  subset.overlap <- getOverlapLoopNum(subsetLoop, peak)
  subset.notOverlap <- nrow(subsetLoop) - subset.overlap
  
  contingency_table <- matrix(c(subset.overlap, subset.notOverlap,
                                all.overlap, all.notOverlap), nrow = 2, byrow = TRUE)
  colnames(contingency_table) <- c("Overlapping", "Not_Overlapping")
  rownames(contingency_table) <- c("All loops", "Subset loops")
  
  # Perform Fisher's Exact Test
  fisher_test_result <- fisher.test(contingency_table)
  return(fisher_test_result)
}



loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_UP_diff0.2.bedpe"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_NO_diff0.2.bedpe"))
loop.upno <- bind_rows(loop.up, loop.no)
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_DOWN_diff0.2.bedpe"))

### Dylan
# Seeding
temp <- getSEOverlapFisher(loop, loop.upno, peak.Dylan.SE)
result.tb <- tibble(loopType = "UP/NO",
                    target = "Dylan SE",
                    pvalue = temp$p.value,
                    oddsRatio = temp$estimate)

# temp <- getSEOverlapFisher(loop, loop.up, peak.Dylan.SE)
# result.tb <- tibble(loopType = "UP",
#                     target = "Dylan SE",
#                     pvalue = temp$p.value,
#                     oddsRatio = temp$estimate)
# Add row
# temp <- getSEOverlapFisher(loop, loop.no, peak.Dylan.SE)
# result.tb <- result.tb %>% 
#   add_row(loopType = "NO",
#           target = "Dylan SE",
#           pvalue = temp$p.value,
#           oddsRatio = temp$estimate)

temp <- getSEOverlapFisher(loop, loop.down, peak.Dylan.SE)
result.tb <- result.tb %>% 
  add_row(loopType = "DOWN",
          target = "Dylan SE",
          pvalue = temp$p.value,
          oddsRatio = temp$estimate)

### Whyte
# Seeding
# temp <- getSEOverlapFisher(loop, loop.up, peak.Whyte.SE)
# result.tb <- result.tb %>% 
#   add_row(loopType = "UP",
#                     target = "Whyte SE",
#                     pvalue = temp$p.value,
#                     oddsRatio = temp$estimate)

# Add row
temp <- getSEOverlapFisher(loop, loop.upno, peak.Whyte.SE)
result.tb <- result.tb %>% 
  add_row(loopType = "UP/NO",
          target = "Whyte SE",
          pvalue = temp$p.value,
          oddsRatio = temp$estimate)

temp <- getSEOverlapFisher(loop, loop.down, peak.Whyte.SE)
result.tb <- result.tb %>% 
  add_row(loopType = "DOWN",
          target = "Whyte SE",
          pvalue = temp$p.value,
          oddsRatio = temp$estimate)


# Visualization
library(circlize)
data <- result.tb
heatmap_data <- data %>% dplyr::select(target, loopType, oddsRatio) %>%
pivot_wider(names_from = loopType, values_from = oddsRatio) %>%
  column_to_rownames(var = "target")

pvalue_data <- data %>% dplyr::select(target, loopType, pvalue) %>%
  pivot_wider(names_from = loopType, values_from = pvalue) %>%
  column_to_rownames(var = "target")

col_fun <- colorRamp2(c(0, 1, 2), 
                      c("#4852A0", "white", "#CB333A"))

p <- Heatmap(as.matrix(heatmap_data),
        name = "Odds Ratio",
        col = col_fun,
        # Add annotation for p-values
        cell_fun = function(j, i, x, y, width, height, fill) {
          pval <- pvalue_data[i, j]
          label <- ifelse(pval > 0.05, "n.s.", sprintf("%.2e", pval))
          grid.text(label, x, y, gp = gpar(fontsize = fontSizeS, fontfamily = fontType))
        },
        # Customize the heatmap layout
        cluster_rows = TRUE,
        show_row_dend = FALSE,
        cluster_columns = FALSE,
        row_names_gp = gpar(fontsize = fontSizeS, fontfamily = fontType),
  column_names_gp = gpar(fontsize = fontSizeS, fontfamily = fontType),
        heatmap_legend_param = list(
          at = c(0, 1, 2),
          labels = c("0", "1", "2"),
          title_gp = gpar(fontfamily = fontType, fontsize = fontSizeS),
          labels_gp = gpar(fontfamily = fontType, fontsize = fontSizeS)
        )
)

# fileName <- here(figDir, "heatmap_SE_enrichment")
# width <- panelSize(1.5)*mmToInch
# height <- panelSize(0.7)*mmToInch
# svglite(paste0(fileName, ".svg"), width = width, height =height)
# print(p)
# dev.off()
# png(paste0(fileName, ".png"), width = width, height =height, res = 600, unit = "in")
# print(p)
# dev.off()

#######################

data$loopType <- factor(data$loopType, levels = c("UP/NO", "DOWN"))
data$target <- factor(data$target, levels = c("Whyte SE", "Dylan SE"))

p <- ggplot(data, aes(x = loopType, y = target, size = -log10(pvalue), fill = oddsRatio)) +
  geom_point(shape = 21,        # Ensures a point with an outline
             stroke = 1*ptToMM      # Line width for the border
  ) + theme_bw() + 
  scale_size_continuous(range = c(1, 3)) +  # Set min and max point sizes here
  scale_fill_gradientn(colors = c("#4852A0", "white", "#CB333A"),  # Define gradient colors
                      values = scales::rescale(c(0.5, 1, 1.5)), limits = c(0.5, 1.5), 
                      #low = "white", high = "#CB333A",
                      #                  limits = c(1, 3),
                      oob = scales::squish, # Define gradient colors
                      guide = guide_colorbar(
                        barwidth = 1.5/5.08,  # Adjust width of the color bar
                        barheight = 15/5.08   # Adjust height of the color bar
                      )
  ) +
  labs(x = NULL, y = NULL)  +
  theme(
    axis.title = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )

fileName <- here(figDir, "heatmap_SE_enrichment_dotplot")
width <- panelSize(1.8)*mmToInch
height <- panelSize(1.1)*mmToInch
# png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
# print(p)
# dev.off()
svglite(paste0(fileName, ".svg"),  height = height, width = width)
print(p)
dev.off()

```
##### A485
```{r}
name <- "chromo_cons_annoHierarchy"
loop <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe"))

getOverlapLoopNum <- function(loop, peak){
  anchor1 <- GRanges(seqnames = loop$V1, ranges = IRanges(start = loop$V2, end = loop$V3))
  anchor2 <- GRanges(seqnames = loop$V4, ranges = IRanges(start = loop$V5, end = loop$V6))
  a <- queryHits(findOverlaps(anchor1, peak))
  b <- queryHits(findOverlaps(anchor2, peak))
  return(length(unique(c(a, b))))
}

getSEOverlapFisher <- function(allLoop, subsetLoop, peak){
  all.overlap <- getOverlapLoopNum(allLoop, peak)
  all.notOverlap <- nrow(allLoop) - all.overlap
  
  subset.overlap <- getOverlapLoopNum(subsetLoop, peak)
  subset.notOverlap <- nrow(subsetLoop) - subset.overlap
  
  contingency_table <- matrix(c(subset.overlap, subset.notOverlap,
                                all.overlap, all.notOverlap), nrow = 2, byrow = TRUE)
  colnames(contingency_table) <- c("Overlapping", "Not_Overlapping")
  rownames(contingency_table) <- c("All loops", "Subset loops")
  
  # Perform Fisher's Exact Test
  fisher_test_result <- fisher.test(contingency_table)
  return(fisher_test_result)
}



loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_UP_diff0.2.bedpe"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_NO_diff0.2.bedpe"))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_DOWN_diff0.2.bedpe"))

### Dylan
# Seeding
temp <- getSEOverlapFisher(loop, loop.up, peak.Dylan.SE)
result.tb <- tibble(loopType = "UP",
                    target = "Dylan SE",
                    pvalue = temp$p.value,
                    oddsRatio = temp$estimate)
# Add row
temp <- getSEOverlapFisher(loop, loop.no, peak.Dylan.SE)
result.tb <- result.tb %>% 
  add_row(loopType = "NO",
          target = "Dylan SE",
          pvalue = temp$p.value,
          oddsRatio = temp$estimate)

temp <- getSEOverlapFisher(loop, loop.down, peak.Dylan.SE)
result.tb <- result.tb %>% 
  add_row(loopType = "DOWN",
          target = "Dylan SE",
          pvalue = temp$p.value,
          oddsRatio = temp$estimate)

### Whyte
# Seeding
temp <- getSEOverlapFisher(loop, loop.up, peak.Whyte.SE)
result.tb <- result.tb %>% 
  add_row(loopType = "UP",
                    target = "Whyte SE",
                    pvalue = temp$p.value,
                    oddsRatio = temp$estimate)
# Add row
temp <- getSEOverlapFisher(loop, loop.no, peak.Whyte.SE)
result.tb <- result.tb %>% 
  add_row(loopType = "NO",
          target = "Whyte SE",
          pvalue = temp$p.value,
          oddsRatio = temp$estimate)

temp <- getSEOverlapFisher(loop, loop.down, peak.Whyte.SE)
result.tb <- result.tb %>% 
  add_row(loopType = "DOWN",
          target = "Whyte SE",
          pvalue = temp$p.value,
          oddsRatio = temp$estimate)


# Visualization
library(circlize)
data <- result.tb
heatmap_data <- data %>% dplyr::select(target, loopType, oddsRatio) %>%
pivot_wider(names_from = loopType, values_from = oddsRatio) %>%
  column_to_rownames(var = "target")

pvalue_data <- data %>% dplyr::select(target, loopType, pvalue) %>%
  pivot_wider(names_from = loopType, values_from = pvalue) %>%
  column_to_rownames(var = "target")

col_fun <- colorRamp2(c(0, 1, 5), 
                      c("blue", "white", "red"))

Heatmap(as.matrix(heatmap_data),
        name = "Odds Ratio",
        col = col_fun,
        # Add annotation for p-values
        cell_fun = function(j, i, x, y, width, height, fill) {
          pval <- pvalue_data[i, j]
          label <- ifelse(pval > 0.05, "n.s.", sprintf("%.2e", pval))
          grid.text(label, x, y, gp = gpar(fontsize = 10))
        },
        # Customize the heatmap layout
        cluster_rows = TRUE,
        show_row_dend = FALSE,
        cluster_columns = FALSE,
        column_title = "Interest",
        row_title = "Target",
        heatmap_legend_param = list(at = c(0, 1, 2, 3, 4, 5), 
                                    labels = c("0", "1", "2", "3", "4", "5")))



```


## [2.28] Comparing differential loops in dTAG to A485
The question I want to ask here is whether perturbed loops in dTAG experiments are either perturbed or not perturbed in A485 experiment. If there is compensation going on between RAD21 and A485, UP loop in RAD21 should be more DOWN in A485 and vice versa
#### Exploratory part
```{r}
diffCutoff <- 0.2
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
data <- data %>%
    dplyr::mutate(updown_dTAG_DMSO = ifelse(diff_dTAG_DMSO > diffCutoff, "UP", 
                                            ifelse(diff_dTAG_DMSO > -diffCutoff, "NO", "DOWN")),
                  updown_A485_DMSO = ifelse(diff_A485_DMSO > diffCutoff, "UP", 
                                            ifelse(diff_A485_DMSO > -diffCutoff, "NO", "DOWN")))

 temp <- data
  temp$density <- get_density(temp$diff_dTAG_DMSO, temp$diff_A485_DMSO, n = 100)
  temp <- temp %>% dplyr::arrange(density)
  
  p1 <- ggplot(temp, aes(x = diff_dTAG_DMSO, y = diff_A485_DMSO, color = density)) +
    geom_point(show.legend = FALSE) + 
    scale_color_viridis() + 
    geom_hline(yintercept = diffCutoff, alpha = 1, color = "black") +
    geom_hline(yintercept = -diffCutoff, alpha = 1, color = "black") +
    geom_vline(xintercept = diffCutoff, alpha = 1, color = "black") +
    geom_vline(xintercept = -diffCutoff, alpha = 1, color = "black") +
    geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
    geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +coord_fixed(ratio = 1, ylim = c(-1, 1), xlim = c(-1, 1)) + 
    theme_classic()
  
  fileName <- paste0("scatterplot_", name, "_", diffCutoff, "_diff_", loopName, "_dTAG-", diffName)
  png(here(figDir, paste0(fileName, ".png")), res = 600, units = "in", width = 3.5, height = 3)
  print(p1)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")), 
          width = 3.5, height = 3)
  print(p1)
  dev.off()



```


#### Scatterplot & box plot
```{r}
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
data <- data %>%
    dplyr::mutate(updown_dTAG_DMSO = ifelse(diff_dTAG_DMSO > diffCutoff, "UP", 
                                            ifelse(diff_dTAG_DMSO > -diffCutoff, "NO", "DOWN")),
                  updown_A485_DMSO = ifelse(diff_A485_DMSO > diffCutoff, "UP", 
                                            ifelse(diff_A485_DMSO > -diffCutoff, "NO", "DOWN")))

makeAcrossSampleScatterplotdTAG <- function(data, AnnoList, diff, name, loopName, diffName){
  temp <- data %>% dplyr::filter(Anno2 %in% AnnoList,
                                 updown_dTAG_DMSO %in% diff)
  temp$density <- get_density(temp$diff_dTAG_DMSO, temp$diff_A485_DMSO, n = 100)
  temp <- temp %>% dplyr::arrange(density)

  p1 <- ggplot(temp, aes(x = diff_dTAG_DMSO, y = diff_A485_DMSO, color = density)) +
    geom_point(show.legend = FALSE) + 
    scale_color_viridis() + 
    geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
    geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
    geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
    geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
    geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +coord_fixed(ratio = 1, ylim = c(-1, 1), xlim = c(-1, 1)) + 
    theme_classic() + ggtitle(paste0(name, "_", loopName, "_dTAG-", diffName)) + theme(plot.title = element_text(size = 5))
  
  fileName <- paste0("scatterplot_", name, "_", diffCutoff, "_diff_", loopName, "_dTAG-", diffName)
  png(here(figDir, paste0(fileName, ".png")), res = 600, units = "in", width = 3.5, height = 3)
  print(p1)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")), 
          width = 3.5, height = 3)
  print(p1)
  dev.off()
}

makeAcrossSampleScatterplotdTAG(data, unique(data$Anno2), c("UP", "NO", "DOWN"), name, "all", "all")
makeAcrossSampleScatterplotdTAG(data, unique(data$Anno2), c("UP"), name, "all", "UP")
makeAcrossSampleScatterplotdTAG(data, unique(data$Anno2), c("NO"), name, "all", "NO")
makeAcrossSampleScatterplotdTAG(data, unique(data$Anno2), c("DOWN"), name, "all", "DOWN")

makeAcrossSampleScatterplotdTAG(data, c("P-P", "P-E", "E-E"), c("UP", "NO", "DOWN"), name, "pe-pe", "all")
makeAcrossSampleScatterplotdTAG(data, c("P-P", "P-E", "E-E"), c("UP"), name, "pe-pe", "UP")
makeAcrossSampleScatterplotdTAG(data, c("P-P", "P-E", "E-E"), c("NO"), name, "pe-pe", "NO")
makeAcrossSampleScatterplotdTAG(data, c("P-P", "P-E", "E-E"), c("DOWN"), name, "pe-pe", "DOWN")

makeAcrossSampleScatterplotdTAG(data, c("S-S", "S-X"), c("UP", "NO", "DOWN"), name, "str", "all")
makeAcrossSampleScatterplotdTAG(data, c("S-S", "S-X"), c("UP"), name, "str", "UP")
makeAcrossSampleScatterplotdTAG(data, c("S-S", "S-X"), c("NO"), name, "str", "NO")
makeAcrossSampleScatterplotdTAG(data, c("S-S", "S-X"), c("DOWN"), name, "str", "DOWN")

makeAcrossSampleScatterplotA485 <- function(data, AnnoList, diff, name, loopName, diffName){
  temp <- data %>% dplyr::filter(Anno2 %in% AnnoList,
                                 updown_A485_DMSO %in% diff)
  temp$density <- get_density(temp$diff_dTAG_DMSO, temp$diff_A485_DMSO, n = 100)
  temp <- temp %>% dplyr::arrange(density)

  p1 <- ggplot(temp, aes(x = diff_dTAG_DMSO, y = diff_A485_DMSO, color = density)) +
    geom_point(show.legend = FALSE) + 
    scale_color_viridis() + 
    geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
    geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
    geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
    geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
    geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +coord_fixed(ratio = 1, ylim = c(-1, 1), xlim = c(-1, 1)) + 
    theme_classic() + ggtitle(paste0(name, "_", loopName, "_A485G-", diffName)) + theme(plot.title = element_text(size = 5))
  
  fileName <- paste0("scatterplot_", name, "_", diffCutoff, "_diff_", loopName, "_A485-", diffName)
  png(here(figDir, paste0(fileName, ".png")), res = 600, units = "in", width = 3.5, height = 3)
  print(p1)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")), 
          width = 3.5, height = 3)
  print(p1)
  dev.off()
}

makeAcrossSampleScatterplotA485(data, unique(data$Anno2), c("UP", "NO", "DOWN"), name, "all", "all")
makeAcrossSampleScatterplotA485(data, unique(data$Anno2), c("UP"), name, "all", "UP")
makeAcrossSampleScatterplotA485(data, unique(data$Anno2), c("NO"), name, "all", "NO")
makeAcrossSampleScatterplotA485(data, unique(data$Anno2), c("DOWN"), name, "all", "DOWN")

makeAcrossSampleScatterplotA485(data, c("P-P", "P-E", "E-E"), c("UP", "NO", "DOWN"), name, "pe-pe", "all")
makeAcrossSampleScatterplotA485(data, c("P-P", "P-E", "E-E"), c("UP"), name, "pe-pe", "UP")
makeAcrossSampleScatterplotA485(data, c("P-P", "P-E", "E-E"), c("NO"), name, "pe-pe", "NO")
makeAcrossSampleScatterplotA485(data, c("P-P", "P-E", "E-E"), c("DOWN"), name, "pe-pe", "DOWN")

makeAcrossSampleScatterplotA485(data, c("S-S", "S-X"), c("UP", "NO", "DOWN"), name, "str", "all")
makeAcrossSampleScatterplotA485(data, c("S-S", "S-X"), c("UP"), name, "str", "UP")
makeAcrossSampleScatterplotA485(data, c("S-S", "S-X"), c("NO"), name, "str", "NO")
makeAcrossSampleScatterplotA485(data, c("S-S", "S-X"), c("DOWN"), name, "str", "DOWN")

######## Barplot
temp <- data %>% dplyr::select(updown_dTAG_DMSO, diff_A485_DMSO)
ggplot(temp, aes(x = updown_dTAG_DMSO, y = diff_A485_DMSO)) + geom_boxplot(outlier.shape = NA) + theme_classic() +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = 0.2, linetype = "dashed") +
  geom_hline(yintercept = -0.2, linetype = "dashed") + coord_cartesian(ylim = c(-0.5, 0.5))

temp <- data %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "E-E")) %>% dplyr::select(updown_dTAG_DMSO, diff_A485_DMSO) 
ggplot(temp, aes(x = updown_dTAG_DMSO, y = diff_A485_DMSO)) + geom_boxplot(outlier.shape = NA) + theme_classic() +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = 0.2, linetype = "dashed") +
  geom_hline(yintercept = -0.2, linetype = "dashed") + coord_cartesian(ylim = c(-0.5, 0.5))


temp <- data %>% dplyr::filter(Anno2 %in% c("S-S", "S-X")) %>% dplyr::select(updown_dTAG_DMSO, diff_A485_DMSO) 
ggplot(temp, aes(x = updown_dTAG_DMSO, y = diff_A485_DMSO)) + geom_boxplot(outlier.shape = NA) + theme_classic() +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = 0.2, linetype = "dashed") +
  geom_hline(yintercept = -0.2, linetype = "dashed") + coord_cartesian(ylim = c(-0.5, 0.5))

###
temp <- data %>% dplyr::select(updown_A485_DMSO, diff_dTAG_DMSO)
ggplot(temp, aes(x = updown_A485_DMSO, y = diff_dTAG_DMSO)) + geom_boxplot(outlier.shape = NA) + theme_classic() +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = 0.2, linetype = "dashed") +
  geom_hline(yintercept = -0.2, linetype = "dashed") + coord_cartesian(ylim = c(-0.5, 0.5))

temp <- data %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "E-E")) %>% dplyr:::select(updown_A485_DMSO, diff_dTAG_DMSO)
ggplot(temp, aes(x = updown_A485_DMSO, y = diff_dTAG_DMSO)) + geom_boxplot(outlier.shape = NA) + theme_classic() +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = 0.2, linetype = "dashed") +
  geom_hline(yintercept = -0.2, linetype = "dashed") + coord_cartesian(ylim = c(-0.5, 0.5))


temp <- data %>% dplyr::filter(Anno2 %in% c("S-S", "S-X")) %>% dplyr:::select(updown_A485_DMSO, diff_dTAG_DMSO)
ggplot(temp, aes(x = updown_A485_DMSO, y = diff_dTAG_DMSO)) + geom_boxplot(outlier.shape = NA) + theme_classic() +
  geom_hline(yintercept = 0) +
  geom_hline(yintercept = 0.2, linetype = "dashed") +
  geom_hline(yintercept = -0.2, linetype = "dashed") + coord_cartesian(ylim = c(-0.5, 0.5))

```

## [2.29] Compartment analysis
Q. What are the changes in compartment level upon A485 treatment?
#### Compartment change distribution
```{r}
### IMPORTING COMPARMTNET SCORES
compDir <- here("../..", "result", "compartment", "CscoreTools")

cscore.DMSO <- as_tibble(fread(here(compDir, "G1DMSO_Merged_10kb_cscore_final.bedgraph"), skip = 1)) %>%
  dplyr::mutate(V2 = V2 + 1)
colnames(cscore.DMSO) <- c("chr", "start", "end", "cscore_DMSO")

cscore.A485 <- as_tibble(fread(here(compDir, "G1A485_Merged_10kb_cscore_final.bedgraph"), skip = 1)) %>%
  dplyr::mutate(V2 = V2 + 1)
colnames(cscore.A485) <- c("chr", "start", "end", "cscore_A485")


cscore <- dplyr::full_join(cscore.DMSO, cscore.A485, by = c("chr", "start", "end"))

# FILTER ROWS WITH NA
cscore <- cscore %>% filter(!if_any(everything(), is.na))

# Annotating how the compartment changed
threshold <- 0.1
cscore <- cscore %>% rowwise() %>% dplyr::mutate(isAB_DMSO = ifelse(cscore_DMSO > 0, "A", "B"),
                                                 isAB_A485 = ifelse(cscore_A485 > 0, "A", "B"),
                                                 changeType = case_when(
                                                   abs(cscore_DMSO - cscore_A485) < threshold ~ "Unchanged",
                                                   isAB_DMSO == "A" & isAB_A485 == "B" ~ "AtoB",
                                                   isAB_DMSO == "B" & isAB_A485 == "A" ~ "BtoA",
                                                   isAB_DMSO == "A" & isAB_A485 == "A" & abs(cscore_DMSO) > abs(cscore_A485) ~ "A_weakening",
                                                   isAB_DMSO == "A" & isAB_A485 == "A" & abs(cscore_DMSO) <= abs(cscore_A485) ~ "A_strengthening",
                                                   isAB_DMSO == "B" & isAB_A485 == "B" & abs(cscore_DMSO) > abs(cscore_A485) ~ "B_weakening",
                                                   isAB_DMSO == "B" & isAB_A485 == "B" & abs(cscore_DMSO) <= abs(cscore_A485) ~ "B_strengthening"                                                 ))
cscore_summary <- tibble(comparison = rep("A485_vs_DMSO", 7),
                         changeType = c("AtoB", "BtoA", "A_weakening", "A_strengthening", "B_weakening", "B_strengthening", "Unchanged"),
                         value = c(sum(cscore$changeType == "AtoB"),
                                   sum(cscore$changeType == "BtoA"),
                                   sum(cscore$changeType == "A_weakening"),
                                   sum(cscore$changeType == "A_strengthening"),
                                   sum(cscore$changeType == "B_weakening"),
                                   sum(cscore$changeType == "B_strengthening"),
                                   sum(cscore$changeType == "Unchanged")))
cscore_summary$changeType <- factor(cscore_summary$changeType, levels = c("A_weakening", "AtoB", "B_strengthening",
                                                                          "B_weakening", "BtoA", "A_strengthening",
                                                                          "Unchanged"))
ggplot(cscore_summary, aes(x = comparison, y = value, fill = changeType)) + geom_bar(position = "stack", stat = "identity") +
  theme_bw() +
  scale_fill_manual(values = c("skyblue", "blue", "darkblue", "pink", "red2", "darkred" ,"grey"))
  
```
#### Combining with RNA-seq
```{r}
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3)) %>%
  dplyr::select(V1, TSS, V6)
colnames(gene.tb) <- c("chr", "TSS", "ensembl")

diff.RNA <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
diff.RNA <- diff.RNA %>% dplyr::left_join(gene.tb, by = c("ensembl_gene_id" = "ensembl")) %>%
  dplyr::filter(!is.na(TSS))

alpha <- 0.05
fcCutoff <- 0.5

diff.RNA <- diff.RNA %>% dplyr::mutate(diff = case_when(padj < alpha & shrinked_log2FC > fcCutoff ~ "UP",
                                                        padj < alpha & shrinked_log2FC < -fcCutoff ~ "DOWN",
                                                        TRUE ~ "NO"))


getCompChangeType <- function(chrom, TSS, cscore.tb){
  temp.tb <- cscore.tb %>% dplyr::filter(chr == chrom, start < TSS, end > TSS)
  out <- temp.tb$changeType
  if(length(out) < 1){
    return(NA)
  }else{
    return(out)
  }
}

cscore.gr <- makeGRangesFromDataFrame(cscore[1:3])
temp <-diff.RNA %>% dplyr::mutate(start = TSS, end = TSS +1) %>%
  dplyr::select(chr, start, end)
diff.RNA.gr <- makeGRangesFromDataFrame(temp)
overlap <- findOverlaps(diff.RNA.gr, cscore.gr)

#cscore.selected <- cscore %>% dplyr::slice(subjectHits(overlap))
## Problem of certain genes not getting overlap with cscore due to sparse calling?
batch1 <- bind_cols(diff.RNA[queryHits(overlap)],
          data.table(cscore)[subjectHits(overlap)])


missed <- diff.RNA[-queryHits(overlap)]
temp <- missed %>% dplyr::mutate(TSS = TSS + 10000) %>% dplyr::mutate(start = TSS, end = TSS +1) %>%
  dplyr::select(chr, start, end)
missed.gr <- makeGRangesFromDataFrame(temp)
overlap <- findOverlaps(missed.gr, cscore.gr)
batch2 <- bind_cols(missed[queryHits(overlap)],
                    data.table(cscore)[subjectHits(overlap)])

missed2 <- missed[-queryHits(overlap)]
temp <- missed2 %>% dplyr::mutate(TSS = TSS - 10000) %>% dplyr::mutate(start = TSS, end = TSS +1) %>%
  dplyr::select(chr, start, end)
missed.gr <- makeGRangesFromDataFrame(temp)
overlap <- findOverlaps(missed.gr, cscore.gr)
batch3 <- bind_cols(missed2[queryHits(overlap)],
                    data.table(cscore)[subjectHits(overlap)])

#missed3 <- missed2[-queryHits(overlap)]


diff.RNA <- bind_rows(batch1, batch2, batch3)


# VISUALIZE
diff.RNA$changeType <- factor(diff.RNA$changeType, levels = c("A_weakening", "AtoB", "B_strengthening",
                                                                          "B_weakening", "BtoA", "A_strengthening",
                                                                          "Unchanged"))
ggplot(diff.RNA, aes(x = diff, fill = changeType)) +
  geom_bar(position = "fill") +
  labs(title = "Stacked Bar Plot of Change Type by Diff",
       x = "Diff",
       y = "Count") +
  theme_bw() +  scale_fill_manual(values = c("skyblue", "blue", "darkblue", "pink", "red2", "darkred" ,"grey"))



```
#### Combining with loop
```{r}
name <- "chromo_cons_annoHierarchy"

# UP loop
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_UP_diff0.2.bedpe"))
anchor.up <- (extractAnchor(loop.up))
anchor.up.tb <- as_tibble(anchor.up) %>%
  dplyr::mutate(center = (start + end)/2) %>%
  dplyr::mutate(start = center-1, end = center+1) %>%
  dplyr::select(seqnames, start, end)
anchor.up <- makeGRangesFromDataFrame(anchor.up.tb)
# NO loop
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_NO_diff0.2.bedpe"))
anchor.no <- (extractAnchor(loop.no))
anchor.no.tb <- as_tibble(anchor.no) %>%
  dplyr::mutate(center = (start + end)/2) %>%
  dplyr::mutate(start = center-1, end = center+1) %>%
  dplyr::select(seqnames, start, end)
anchor.no <- makeGRangesFromDataFrame(anchor.no.tb)
# DOWN loop
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_DOWN_diff0.2.bedpe"))
anchor.down <- (extractAnchor(loop.down))
anchor.down.tb <- as_tibble(anchor.down) %>%
  dplyr::mutate(center = (start + end)/2) %>%
  dplyr::mutate(start = center-1, end = center+1) %>%
  dplyr::select(seqnames, start, end)
anchor.down <- makeGRangesFromDataFrame(anchor.down.tb)


########################################################################
### Overlap
cscore.gr <- makeGRangesFromDataFrame(cscore[1:3])
overlap <- findOverlaps(anchor.up, cscore.gr)

## Problem of certain genes not getting overlap with cscore due to sparse calling?
batch1 <- bind_cols(data.table(anchor.up.tb)[queryHits(overlap)],
          data.table(cscore)[subjectHits(overlap)])


missed <- data.table(anchor.up.tb)[-queryHits(overlap)]
temp <- missed %>% dplyr::mutate(start = start - 10000, end = end - 10000) %>%
  dplyr::select(seqnames, start, end)
missed.gr <- makeGRangesFromDataFrame(temp)
overlap <- findOverlaps(missed.gr, cscore.gr)
batch2 <- bind_cols((missed)[queryHits(overlap)],
          data.table(cscore)[subjectHits(overlap)])


data.up <- bind_rows(batch1, batch2)

########################################################################
### Overlap
cscore.gr <- makeGRangesFromDataFrame(cscore[1:3])
overlap <- findOverlaps(anchor.no, cscore.gr)

## Problem of certain genes not getting overlap with cscore due to sparse calling?
batch1 <- bind_cols(data.table(anchor.no.tb)[queryHits(overlap)],
          data.table(cscore)[subjectHits(overlap)])


missed <- data.table(anchor.no.tb)[-queryHits(overlap)]
temp <- missed %>% dplyr::mutate(start = start - 10000, end = end - 10000) %>%
  dplyr::select(seqnames, start, end)
missed.gr <- makeGRangesFromDataFrame(temp)
overlap <- findOverlaps(missed.gr, cscore.gr)
batch2 <- bind_cols((missed)[queryHits(overlap)],
          data.table(cscore)[subjectHits(overlap)])


data.no <- bind_rows(batch1, batch2)

########################################################################
### Overlap
cscore.gr <- makeGRangesFromDataFrame(cscore[1:3])
overlap <- findOverlaps(anchor.down, cscore.gr)

## Problem of certain genes not getting overlap with cscore due to sparse calling?
batch1 <- bind_cols(data.table(anchor.down.tb)[queryHits(overlap)],
          data.table(cscore)[subjectHits(overlap)])


missed <- data.table(anchor.down.tb)[-queryHits(overlap)]
temp <- missed %>% dplyr::mutate(start = start - 10000, end = end - 10000) %>%
  dplyr::select(seqnames, start, end)
missed.gr <- makeGRangesFromDataFrame(temp)
overlap <- findOverlaps(missed.gr, cscore.gr)
batch2 <- bind_cols((missed)[queryHits(overlap)],
          data.table(cscore)[subjectHits(overlap)])


data.down <- bind_rows(batch1, batch2)

n.up <- nrow(data.up)
n.no <- nrow(data.no)
n.down <- nrow(data.down)

data <- tibble(loopDiff = c(rep("UP", n.up), rep("NO", n.no), rep("DOWN", n.down)),
              changeType = c(data.up$changeType, data.no$changeType, data.down$changeType))

# VISUALIZE
data$changeType <- factor(data$changeType, levels = c("A_weakening", "AtoB", "B_strengthening",
                                                                          "B_weakening", "BtoA", "A_strengthening",
                                                                          "Unchanged"))
ggplot(data, aes(x = loopDiff, fill = changeType)) +
  geom_bar(position = "fill") +
  labs(title = "Stacked Bar Plot of Change Type by Diff",
       x = "Diff",
       y = "Count") +
  theme_bw() +  scale_fill_manual(values = c("skyblue", "blue", "darkblue", "pink", "red2", "darkred" ,"grey"))




```
## [2.30] ChromHMM
```{r}
resultDir <- here("../../result")

data <- fread(here(resultDir, "chromHMM", "A485_pe-pe_anchors", "overlap_enrich_100_state.txt"))
colnames(data) <- c("state", "genome", "pe-pe_A485_down", "pe-pe_A485_no", "pe-pe_A485_up")
data <- data %>% dplyr::select(-genome)

data_matrix <- data %>%
  column_to_rownames(var = "state") %>%
  as.matrix()

library(circlize)

col_fun <- colorRamp2(c(0, 1, 10), 
                      c("blue", "white", "red"))

Heatmap(
  data_matrix,
  name = "Value",
  show_row_names = TRUE,
  show_column_names = TRUE,
  cluster_columns = FALSE,
  cluster_rows = FALSE,
   col = col_fun
)

#######
data2 <- data %>% dplyr::select(c(1, 2, 4))
data2 <- data2 %>% dplyr::filter(`pe-pe_A485_down` > `pe-pe_A485_up`)
data_matrix <- data2 %>%
  column_to_rownames(var = "state") %>%
  as.matrix()

library(circlize)

col_fun <- colorRamp2(c(0, 1, 10), 
                      c("blue", "white", "red"))

Heatmap(
  data_matrix,
  name = "Value",
  show_row_names = TRUE,
  show_column_names = TRUE,
  cluster_columns = FALSE,
  cluster_rows = FALSE,
   col = col_fun
)


```

## [2.31] Making Visualization Easy
https://bioconductor.org/books/devel/OHCA/pages/visualization.html
```{r}
### Getting loop ID
temp <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe")) %>%
  dplyr::mutate(res = V3 - V2,
                id = paste(V1, res, V2, V5, sep = "_"))
regID <- temp$id

temp <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure.bedpe")) %>%
  dplyr::mutate(res = V3 - V2,
                id = paste(V1, res, V2, V5, sep = "_"))
strID <- temp$id

############## REG
## Filtering loops to plot
data <- fread(here(consensusDir, "chromo_cons_score.tsv"))
temp <- data %>% dplyr::filter(id %in% regID, dTAG > 0.5) %>% dplyr::arrange(desc(dTAG))
temp <- temp %>% dplyr::select(seq(1, 6))
colnames(temp) <- c("V1", "V2", "V3", "V4", "V5", "V6")
loops <- importBedpe(temp)

# Visualization
hicDir <- "/Volumes/UKJIN_SSD/data_vault_2024summer_microC/hic"
windowSize <- 1*1e6

i = 1
# for(i in seq(1, 25)){
  ## Loading hic and plotting
  chr <- as_tibble(loops[i])$seqnames1
  center <- (as_tibble(loops[i])$start1 + as_tibble(loops[i])$end2)/2
  start <- floor(center - 0.5*windowSize)
  end <- floor(center + 0.5*windowSize)
  
  cf.G1DMSO <- HicFile(path = here(hicDir, "G1.DMSO.Merged.hic"))
  cf.G1dTAG <- HicFile(path = here(hicDir, "G1.dTAG.Merged.hic"))
  #cf.G1dTAG <- HicFile(path = here(hicDir, "G1.A485.Merged.hic"))

  res <- 10*1000
  zmax <- 2.5
  hic1 <- import(cf.G1DMSO, focus = paste0(chr, ":", start, "-", end), resolution = res)
  hic2 <- import(cf.G1dTAG, focus = paste0(chr, ":", start, "-", end), resolution = res)
  p1 <- plotMatrix(hic1, dpi = 1000, limits = c(0, zmax), loop = loops)
  p2 <- plotMatrix(hic2, dpi = 1000, limits = c(0, zmax), loop = loops)
  
  fileName <- here(figDir, paste0("visuzliation_regLoop_dTAG_", i, "_10kb_figureVer"))
  
  width <- panelSize(20)*mmToInch
  height <- panelSize(10)*mmToInch
  
  png(paste0(fileName, ".png"), res = 600, units = "in", width = width, height = height)
  print(cowplot::plot_grid(p1, p2, align = "h"))
  dev.off()
  svglite(paste0(fileName, ".svg"), width = width, height = height)
  print(cowplot::plot_grid(p1, p2, align = "h"))
  dev.off()
  
  res <- 25*1000
  zmax <- 3
  start <- floor(center - windowSize)
  end <- floor(center + windowSize)
  hic1 <- import(cf.G1DMSO, focus = paste0(chr, ":", start, "-", end), resolution = res)
  hic2 <- import(cf.G1dTAG, focus = paste0(chr, ":", start, "-", end), resolution = res)
  p1 <- plotMatrix(hic1, dpi = 1000, limits = c(0, zmax), loop = loops)
  p2 <- plotMatrix(hic2, dpi = 1000, limits = c(0, zmax), loop = loops)
  
  fileName <- here(figDir, paste0("visuzliation_regLoop_dTAG_", i, "_25kb"))
  png(paste0(fileName, ".png"), res = 600, units = "in", width = 10, height = 5)
  print(cowplot::plot_grid(p1, p2, align = "h"))
  dev.off()
  # svglite(paste0(fileName, ".svg"), width = 10, height = 5)
  # print(cowplot::plot_grid(p1, p2, align = "h"))
  # dev.off()
# }

############## Str
## Filtering loops to plot
data <- fread(here(consensusDir, "chromo_cons_score.tsv"))
temp <- data %>% dplyr::filter(id %in% strID, dTAG > 0.5) %>% dplyr::arrange(desc(dTAG))
temp <- temp %>% dplyr::select(seq(1, 6))
colnames(temp) <- c("V1", "V2", "V3", "V4", "V5", "V6")
loops <- importBedpe(temp)

# Visualization
hicDir <- "/Volumes/UKJIN_SSD/Genomics_03_Analysis_Working/data_vault_2024summer_microC/hic"
windowSize <- 2*1e6

for(i in seq(1, 25)){
  ## Loading hic and plotting
  chr <- as_tibble(loops[i])$seqnames1
  center <- (as_tibble(loops[i])$start1 + as_tibble(loops[i])$end2)/2
  start <- floor(center - 0.5*windowSize)
  end <- floor(center + 0.5*windowSize)
  
  cf.G1DMSO <- HicFile(path = here(hicDir, "G1.DMSO.Merged.hic"))
  cf.G1dTAG <- HicFile(path = here(hicDir, "G1.dTAG.Merged.hic"))

  res <- 10*1000
  zmax <- 2.5
  hic1 <- import(cf.G1DMSO, focus = paste0(chr, ":", start, "-", end), resolution = res)
  hic2 <- import(cf.G1dTAG, focus = paste0(chr, ":", start, "-", end), resolution = res)
  p1 <- plotMatrix(hic1, dpi = 1000, limits = c(0, zmax), loop = loops)
  p2 <- plotMatrix(hic2, dpi = 1000, limits = c(0, zmax), loop = loops)
  
  fileName <- here(figDir, paste0("visuzliation_strLoop_dTAG_", i, "_10kb"))
  png(paste0(fileName, ".png"), res = 600, units = "in", width = 10, height = 5)
  print(cowplot::plot_grid(p1, p2, align = "h"))
  dev.off()
  
  
  res <- 25*1000
  zmax <- 3
  start <- floor(center - windowSize)
  end <- floor(center + windowSize)
  hic1 <- import(cf.G1DMSO, focus = paste0(chr, ":", start, "-", end), resolution = res)
  hic2 <- import(cf.G1dTAG, focus = paste0(chr, ":", start, "-", end), resolution = res)
  p1 <- plotMatrix(hic1, dpi = 1000, limits = c(0, zmax), loop = loops)
  p2 <- plotMatrix(hic2, dpi = 1000, limits = c(0, zmax), loop = loops)
  
  fileName <- here(figDir, paste0("visuzliation_strLoop_dTAG_", i, "_25kb"))
  png(paste0(fileName, ".png"), res = 600, units = "in", width = 10, height = 5)
  print(cowplot::plot_grid(p1, p2, align = "h"))
  dev.off()
}

############## dTAG called loops
## Filtering loops to plot
data1 <- fread(here(loopDir, "G1.dTAG.Merged_chromosight_25kb.tsv"))
data2 <- fread(here(loopDir, "G1.dTAG.Merged_chromosight_10kb.tsv"))
data3 <- fread(here(loopDir, "G1.dTAG.Merged_chromosight_5kb.tsv"))
data <- bind_rows(data1, data2, data3)

temp <- data %>% dplyr::arrange(desc(score))

temp <- temp %>% dplyr::select(seq(1, 6))
colnames(temp) <- c("V1", "V2", "V3", "V4", "V5", "V6")
loops <- importBedpe(temp)

# Visualization
hicDir <- "/Volumes/UKJIN_SSD/Genomics_03_Analysis_Working/data_vault_2024summer_microC/hic"
windowSize <- 2*1e6

for(i in seq(1, 25)){
  ## Loading hic and plotting
  chr <- as_tibble(loops[i])$seqnames1
  center <- (as_tibble(loops[i])$start1 + as_tibble(loops[i])$end2)/2
  start <- floor(center - 0.5*windowSize)
  end <- floor(center + 0.5*windowSize)
  
  cf.G1DMSO <- HicFile(path = here(hicDir, "G1.DMSO.Merged.hic"))
  cf.G1dTAG <- HicFile(path = here(hicDir, "G1.dTAG.Merged.hic"))

  res <- 10*1000
  zmax <- 2.5
  hic1 <- import(cf.G1DMSO, focus = paste0(chr, ":", start, "-", end), resolution = res)
  hic2 <- import(cf.G1dTAG, focus = paste0(chr, ":", start, "-", end), resolution = res)
  p1 <- plotMatrix(hic1, dpi = 1000, limits = c(0, zmax), loop = loops)
  p2 <- plotMatrix(hic2, dpi = 1000, limits = c(0, zmax), loop = loops)
  
  fileName <- here(figDir, paste0("visuzliation_dTAGcalledLoop_dTAG_", i, "_10kb"))
  png(paste0(fileName, ".png"), res = 600, units = "in", width = 10, height = 5)
  print(cowplot::plot_grid(p1, p2, align = "h"))
  dev.off()
  
  
  res <- 25*1000
  zmax <- 3
  start <- floor(center - windowSize)
  end <- floor(center + windowSize)
  hic1 <- import(cf.G1DMSO, focus = paste0(chr, ":", start, "-", end), resolution = res)
  hic2 <- import(cf.G1dTAG, focus = paste0(chr, ":", start, "-", end), resolution = res)
  p1 <- plotMatrix(hic1, dpi = 1000, limits = c(0, zmax), loop = loops)
  p2 <- plotMatrix(hic2, dpi = 1000, limits = c(0, zmax), loop = loops)
  
  fileName <- here(figDir, paste0("visuzliation_dTAGcalledLoop_dTAG_", i, "_25kb"))
  png(paste0(fileName, ".png"), res = 600, units = "in", width = 10, height = 5)
  print(cowplot::plot_grid(p1, p2, align = "h"))
  dev.off()
}

chr = "chr13"
start = 96900000
end = 98100000
  res <- 10*1000
  zmax <- 2.5
  hic1 <- import(cf.G1DMSO, focus = paste0(chr, ":", start, "-", end), resolution = res)
  hic2 <- import(cf.G1dTAG, focus = paste0(chr, ":", start, "-", end), resolution = res)
  p1 <- plotMatrix(hic1, dpi = 1000, limits = c(0, zmax))
  p2 <- plotMatrix(hic2, dpi = 1000, limits = c(0, zmax))
    print(cowplot::plot_grid(p1, p2, align = "h"))

```

## [2.32] Heatmap of loops
Strategy: check overlap of the union loops to the loops called at each condition
#### Check sample specific called loops
```{r}
library(circlize)

name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
temp <- data %>% dplyr::select(seq(1, 6))
colnames(temp) <- c("V1", "V2", "V3", "V4", "V5", "V6")
cons.loop <- importBedpe(temp)

# Checking DMSO
loop.25kb <- importBedpe(fread(here(loopDir, paste0("G1.DMSO.Merged_chromosight_", 25, "kb.bedpe"))))
loop.10kb <- importBedpe(fread(here(loopDir, paste0("G1.DMSO.Merged_chromosight_", 10, "kb.bedpe"))))
loop.5kb <- importBedpe(fread(here(loopDir, paste0("G1.DMSO.Merged_chromosight_", 5, "kb.bedpe"))))


overlap.25kb <- findOverlaps(cons.loop, loop.25kb)
overlap.10kb <- findOverlaps(cons.loop, loop.10kb)
overlap.5kb <- findOverlaps(cons.loop, loop.5kb)

index <- sort(unique(c(queryHits(overlap.25kb),
         queryHits(overlap.10kb),
         queryHits(overlap.5kb))))
data$calledByDMSO <- 0
data$calledByDMSO[index] <- 1

# Checking dTAG
loop.25kb <- importBedpe(fread(here(loopDir, paste0("G1.dTAG.Merged_chromosight_", 25, "kb.bedpe"))))
loop.10kb <- importBedpe(fread(here(loopDir, paste0("G1.dTAG.Merged_chromosight_", 10, "kb.bedpe"))))
loop.5kb <- importBedpe(fread(here(loopDir, paste0("G1.dTAG.Merged_chromosight_", 5, "kb.bedpe"))))


overlap.25kb <- findOverlaps(cons.loop, loop.25kb)
overlap.10kb <- findOverlaps(cons.loop, loop.10kb)
overlap.5kb <- findOverlaps(cons.loop, loop.5kb)

index <- sort(unique(c(queryHits(overlap.25kb),
         queryHits(overlap.10kb),
         queryHits(overlap.5kb))))
data$calledBydTAG <- 0
data$calledBydTAG[index] <- 1

# Checking A485
loop.25kb <- importBedpe(fread(here(loopDir, paste0("G1.A485.Merged_chromosight_", 25, "kb.bedpe"))))
loop.10kb <- importBedpe(fread(here(loopDir, paste0("G1.A485.Merged_chromosight_", 10, "kb.bedpe"))))
loop.5kb <- importBedpe(fread(here(loopDir, paste0("G1.A485.Merged_chromosight_", 5, "kb.bedpe"))))


overlap.25kb <- findOverlaps(cons.loop, loop.25kb)
overlap.10kb <- findOverlaps(cons.loop, loop.10kb)
overlap.5kb <- findOverlaps(cons.loop, loop.5kb)

index <- sort(unique(c(queryHits(overlap.25kb),
         queryHits(overlap.10kb),
         queryHits(overlap.5kb))))
data$calledByA485 <- 0
data$calledByA485[index] <- 1


# Heatmap (checking whether certain peak is called by specific sample)
dataToPlot <- data %>% dplyr::select(id, calledByDMSO, calledBydTAG, calledByA485)
dataToPlot <- dataToPlot %>% dplyr::mutate(flag = 4*calledByDMSO + 2*calledBydTAG + calledByA485) %>%
  dplyr::arrange(desc(flag))

row_groups <- dataToPlot$flag
data_matrix <- dataToPlot %>% column_to_rownames(var = "id") %>% dplyr::select(-flag) %>% as.matrix()
col_fun <- colorRamp2(c(0, 1), 
                      c("white", "green"))
h1 <- Heatmap(data_matrix, name = "calledBy",
        cluster_columns = FALSE,
        cluster_rows = FALSE,
        show_row_dend = FALSE,
        border = TRUE,
        show_row_names = FALSE,
        col = col_fun,
        row_split = row_groups)


# Prepare data for the second heatmap
dataToPlot2 <- data %>% dplyr::select(id, DMSO, dTAG, A485) %>%
  arrange(match(id, dataToPlot$id))

data_matrix2 <- dataToPlot2 %>% column_to_rownames(var = "id") %>% as.matrix()


col_fun2 <- colorRamp2(c(-0.5, 0, 1), c("blue", "white", "red"))

h2 <- Heatmap(data_matrix2, name = "score",
              cluster_columns = FALSE,
              cluster_rows = TRUE,
              show_row_dend = FALSE,
              border = TRUE,
              show_row_names = FALSE,
              col = col_fun2,
              row_split = row_groups)


fileName <- here(figDir, paste0("heatmap_checkingSampleCalledLoops"))
png(paste0(fileName, ".png"), res = 600, units = "in", width = 3, height = 10)
print(h1 + h2)
dev.off()

```
#### Checking updown
##### All loops
```{r}
# Heatmap
library(circlize)

# Heatmap 2
set.seed(123)

diffCutoff <- 0.2
dataToPlot <- data %>% dplyr::mutate(isdiff_dTAG_DMSO = 
                                       case_when(diff_dTAG_DMSO >= diffCutoff ~ 1,
                                                 abs(diff_dTAG_DMSO) < diffCutoff ~ 0,
                                                 diff_dTAG_DMSO <= -diffCutoff ~ -1,
                                                 TRUE ~ NA),
                                     isdiff_A485_DMSO = 
                                       case_when(diff_A485_DMSO >= diffCutoff ~ 1,
                                                 abs(diff_A485_DMSO) < diffCutoff ~ 0,
                                                 diff_A485_DMSO <= -diffCutoff ~ -1,
                                                 TRUE ~ NA)) %>%
  dplyr::select(id, isdiff_dTAG_DMSO, isdiff_A485_DMSO)
data_matrix <- dataToPlot %>% column_to_rownames(var = "id")%>% as.matrix()
col_fun <- colorRamp2(c(-1, 0, 1), c("blue", "grey", "red"))
h1 <- Heatmap(data_matrix, name = "loop score",
        cluster_columns = FALSE,
        cluster_rows = FALSE,
        row_km = 9,
        show_row_dend = FALSE,
        border = FALSE,
        show_row_names = FALSE,
        col = col_fun)

fileName <- here(figDir, paste0("heatmap_checkingDeltaAcrossCondition"))
png(paste0(fileName, ".png"), res = 600, units = "in", width = 2, height = 10)
print(h1)
dev.off()

set.seed(123)

hm_drawn <- draw(h1)
row_clusters <- row_order(hm_drawn)

loop.cluster1 <- temp[row_clusters[[1]]]
loop.cluster2 <- temp[row_clusters[[2]]]
loop.cluster3 <- temp[row_clusters[[3]]]
loop.cluster4 <- temp[row_clusters[[4]]]
loop.cluster5 <- temp[row_clusters[[5]]]
loop.cluster6 <- temp[row_clusters[[6]]]
loop.cluster7 <- temp[row_clusters[[7]]]
loop.cluster8 <- temp[row_clusters[[8]]]
loop.cluster9 <- temp[row_clusters[[9]]]

```

##### Reg loops
```{r}
# Heatmap
library(circlize)

# Heatmap 2
set.seed(123)

diffCutoff <- 0.2
dataToPlot <- data %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "E-E")) %>%
  dplyr::mutate(isdiff_dTAG_DMSO = 
                  case_when(diff_dTAG_DMSO >= diffCutoff ~ 1,
                            abs(diff_dTAG_DMSO) < diffCutoff ~ 0,
                            diff_dTAG_DMSO <= -diffCutoff ~ -1,
                            TRUE ~ NA),
                isdiff_A485_DMSO = 
                  case_when(diff_A485_DMSO >= diffCutoff ~ 1,
                            abs(diff_A485_DMSO) < diffCutoff ~ 0,
                            diff_A485_DMSO <= -diffCutoff ~ -1,
                            TRUE ~ NA)) %>%
  dplyr::select(id, isdiff_dTAG_DMSO, isdiff_A485_DMSO)
data_matrix <- dataToPlot %>% column_to_rownames(var = "id")%>% as.matrix()
col_fun <- colorRamp2(c(-1, 0, 1), c("blue", "grey", "red"))
h1 <- Heatmap(data_matrix, name = "loop score",
              cluster_columns = FALSE,
              cluster_rows = FALSE,
        row_km = 8,
        show_row_dend = FALSE,
        border = FALSE,
        show_row_names = FALSE,
        col = col_fun)

fileName <- here(figDir, paste0("heatmap_checkingDeltaAcrossCondition_reg"))
png(paste0(fileName, ".png"), res = 600, units = "in", width = 2, height = 10)
print(h1)
dev.off()

set.seed(123)

hm_drawn <- draw(h1)
row_clusters <- row_order(hm_drawn)

loop.cluster1 <- temp[row_clusters[[1]]]
loop.cluster2 <- temp[row_clusters[[2]]]
loop.cluster3 <- temp[row_clusters[[3]]]
loop.cluster4 <- temp[row_clusters[[4]]]
loop.cluster5 <- temp[row_clusters[[5]]]
loop.cluster6 <- temp[row_clusters[[6]]]
loop.cluster7 <- temp[row_clusters[[7]]]
loop.cluster8 <- temp[row_clusters[[8]]]

```

```{r}
library(circlize)

set.seed(123)
dataToPlot <- data %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "E-E")) %>%
  dplyr::select(id, diff_dTAG_DMSO, diff_A485_DMSO)
data_matrix <- dataToPlot %>% column_to_rownames(var = "id")%>% as.matrix()
col_fun <- colorRamp2(c(-1, 0, 1), c("blue", "white", "red"))
h1 <- Heatmap(data_matrix, name = "loop score",
              cluster_columns = FALSE,
              cluster_rows = FALSE,
        row_km = 4,
        show_row_dend = FALSE,
        border = TRUE,
        show_row_names = FALSE,
        col = col_fun)


fileName <- here(figDir, paste0("heatmap_diffScore_reg_k4"))
png(paste0(fileName, ".png"), res = 600, units = "in", width = 2, height = 6)
print(h1)
dev.off()

hm_drawn <- draw(h1)
row_clusters <- row_order(hm_drawn)

loop.cluster1 <- temp[row_clusters[[1]]]
loop.cluster2 <- temp[row_clusters[[2]]]
loop.cluster3 <- temp[row_clusters[[3]]]
loop.cluster4 <- temp[row_clusters[[4]]]
```
##### All loops _OE
```{r}
# Heatmap
library(circlize)

name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
# Heatmap 2
set.seed(123)

# Import obs/exp scores and merge to the dataset
minValue <- -4
diffCutoff <- 0.5
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
    dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                  log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
                  log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)))


# Merge dataset
dataToPlot <- data %>% dplyr::left_join(obsexp, by = c("id")) %>%
  dplyr::mutate(log_obsexp_diff_dTAG_DMSO = log_obsexp_dTAG - log_obsexp_DMSO,
                log_obsexp_diff_A485_DMSO = log_obsexp_A485 - log_obsexp_DMSO,
                isdiff_dTAG_DMSO = case_when(log_obsexp_diff_dTAG_DMSO >= diffCutoff ~ 1,
                                             abs(log_obsexp_diff_dTAG_DMSO) < diffCutoff ~ 0,
                                             log_obsexp_diff_dTAG_DMSO <= diffCutoff ~ -1,
                                             TRUE ~ NA ),
                isdiff_A485_DMSO = case_when(log_obsexp_diff_A485_DMSO >= diffCutoff ~ 1,
                                             abs(log_obsexp_diff_A485_DMSO) < diffCutoff ~ 0,
                                             log_obsexp_diff_A485_DMSO <= diffCutoff ~ -1,
                                             TRUE ~ NA))%>%
  dplyr::select(id, isdiff_dTAG_DMSO, isdiff_A485_DMSO)



data_matrix <- dataToPlot %>% column_to_rownames(var = "id")%>% as.matrix()
col_fun <- colorRamp2(c(-1, 0, 1), c("blue", "grey", "red"))
h1 <- Heatmap(data_matrix, name = "loop score",
        cluster_columns = FALSE,
        cluster_rows = FALSE,
        row_km = 9,
        show_row_dend = FALSE,
        border = FALSE,
        show_row_names = FALSE,
        col = col_fun)

fileName <- here(figDir, paste0("heatmap_checkingDeltaAcrossCondition_OE"))
png(paste0(fileName, ".png"), res = 600, units = "in", width = 2, height = 10)
print(h1)
dev.off()
# 
# set.seed(123)
# 
# hm_drawn <- draw(h1)
# row_clusters <- row_order(hm_drawn)
# 
# loop.cluster1 <- temp[row_clusters[[1]]]
# loop.cluster2 <- temp[row_clusters[[2]]]
# loop.cluster3 <- temp[row_clusters[[3]]]
# loop.cluster4 <- temp[row_clusters[[4]]]
# loop.cluster5 <- temp[row_clusters[[5]]]
# loop.cluster6 <- temp[row_clusters[[6]]]
# loop.cluster7 <- temp[row_clusters[[7]]]
# loop.cluster8 <- temp[row_clusters[[8]]]
# loop.cluster9 <- temp[row_clusters[[9]]]

```
##### Reg loops _OE
```{r}
# Heatmap
library(circlize)

name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
# Heatmap 2
set.seed(123)

# Import obs/exp scores and merge to the dataset
minValue <- -4
diffCutoff <- 0.5
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
    dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                  log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
                  log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)))


# Merge dataset
dataToPlot <- data %>% dplyr::left_join(obsexp, by = c("id")) %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "E-E")) %>%
  dplyr::mutate(log_obsexp_diff_dTAG_DMSO = log_obsexp_dTAG - log_obsexp_DMSO,
                log_obsexp_diff_A485_DMSO = log_obsexp_A485 - log_obsexp_DMSO,
                isdiff_dTAG_DMSO = case_when(log_obsexp_diff_dTAG_DMSO >= diffCutoff ~ 1,
                                             abs(log_obsexp_diff_dTAG_DMSO) < diffCutoff ~ 0,
                                             log_obsexp_diff_dTAG_DMSO <= diffCutoff ~ -1,
                                             TRUE ~ NA ),
                isdiff_A485_DMSO = case_when(log_obsexp_diff_A485_DMSO >= diffCutoff ~ 1,
                                             abs(log_obsexp_diff_A485_DMSO) < diffCutoff ~ 0,
                                             log_obsexp_diff_A485_DMSO <= diffCutoff ~ -1,
                                             TRUE ~ NA))%>%
  dplyr::select(id, isdiff_dTAG_DMSO, isdiff_A485_DMSO)



data_matrix <- dataToPlot %>% column_to_rownames(var = "id")%>% as.matrix()
col_fun <- colorRamp2(c(-1, 0, 1), c("blue", "grey", "red"))
h1 <- Heatmap(data_matrix, name = "loop score",
        cluster_columns = FALSE,
        cluster_rows = FALSE,
        row_km = 9,
        show_row_dend = FALSE,
        border = FALSE,
        show_row_names = FALSE,
        col = col_fun)

fileName <- here(figDir, paste0("heatmap_checkingDeltaAcrossCondition_reg_OE"))
png(paste0(fileName, ".png"), res = 600, units = "in", width = 2, height = 10)
print(h1)
dev.off()
# 
# set.seed(123)
# 
# hm_drawn <- draw(h1)
# row_clusters <- row_order(hm_drawn)
# 
# loop.cluster1 <- temp[row_clusters[[1]]]
# loop.cluster2 <- temp[row_clusters[[2]]]
# loop.cluster3 <- temp[row_clusters[[3]]]
# loop.cluster4 <- temp[row_clusters[[4]]]
# loop.cluster5 <- temp[row_clusters[[5]]]
# loop.cluster6 <- temp[row_clusters[[6]]]
# loop.cluster7 <- temp[row_clusters[[7]]]
# loop.cluster8 <- temp[row_clusters[[8]]]
# loop.cluster9 <- temp[row_clusters[[9]]]

```
### [2.32] Comparing with Bobbie's data
Where does differentiated related genes fall into?
```{r}
marginError <- function(myList) {
  sample.n = length(myList)
  sample.sd = sd(myList)
  sample.se = sample.sd/sqrt(sample.n)
  alpha = 0.05
  degrees.freedom = sample.n - 1
  t.score = qt(p = alpha/2, df = degrees.freedom, lower.tail = F)
  margin.error = t.score*sample.se
  return(margin.error)
}

PROseq.bobbie <- as_tibble(fread(here(refDir, "bobbie_gene_classification.csv")))

geneList.Epi.dTAG.up <- (fread(here(refDir, "diff_G1.dTAG_G1.Epi.dTAG_vs_G1.Epi.DMSO.tsv")) %>% dplyr::filter(padj < 0.05, log2FoldChange > 0))$ensembl_gene_id

geneList.Epi.dTAG.down <- (fread(here(refDir, "diff_G1.dTAG_G1.Epi.dTAG_vs_G1.Epi.DMSO.tsv")) %>% dplyr::filter(padj < 0.05, log2FoldChange < 0))$ensembl_gene_id

## Converting transcript ID to gene ID
idPair_tg <- getBM(attributes = c("ensembl_transcript_id", "ensembl_gene_id"),
                 filters = "ensembl_transcript_id",
                 values = PROseq.bobbie$enst,
                 mart = ensembl.v102)
PROseq.bobbie <- PROseq.bobbie %>% dplyr::left_join(idPair_tg, by = c("enst" = "ensembl_transcript_id"))

## Bar Plot
temp <- PROseq.bobbie %>% dplyr::filter(ensembl_gene_id %in% geneList.Epi.dTAG.down)

group <- c(rep("EpiLC_dTAGvsDMSO_DOWN", 4))
cluster <- rep(c("Early", "Middle", "Late", "Transient"), 1)
cluster <- factor(cluster, levels = c("Early", "Middle", "Late", "Transient"))
value <- c(sum(temp$Cluster == "Early"),
           sum(temp$Cluster == "Middle"),
           sum(temp$Cluster == "Late"),
           sum(temp$Cluster == "Transient"))
data <- data.frame(group, cluster, value)
ggplot(data, aes(fill=cluster, y=value, x=group)) + 
    geom_bar(position="fill", stat="identity") + theme_classic()

## Line plot
temp <- temp %>% dplyr::select(c(4, 5, 6, 7, 8, 9))
temp.tall = temp %>% pivot_longer(-c(1, 2), names_to = "timepoints", values_to = "value")

gg1 = ggplot(temp.tall,
       aes( x = factor(timepoints, level = c("MIT", "EG1", "LG1", "ASYN")),
            y = value,
            group = enst, col = enst)) +
  geom_line() +
  theme_classic() +
  ggtitle("Bobbie, PRO-seq", subtitle = "selected genes") +
  xlab ("timepoints") + ylab("value") +
  theme(plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5),
        legend.position = "none",
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

ggsave(filename = here(figDir, "bobbie_gene_EpiLC_dTAGvsDMSO_DOWN.png"), gg1, width = 4, height = 4, dpi = 300, units = "in", device = "png")


# DRAW RIBBON


temp.ribbon = tibble("timepoints" = c("MIT", "EG1", "LG1", "ASYN"),
                      "value" = c(mean(temp$MIT), mean(temp$EG1), mean(temp$LG1), mean(temp$ASYN)),
                      "lower" = c(mean(temp$MIT) - marginError(temp$MIT), 
                                  mean(temp$EG1) - marginError(temp$EG1), 
                                  mean(temp$LG1) - marginError(temp$LG1), 
                                  mean(temp$ASYN) - marginError(temp$ASYN)),
                      "upper" = c(mean(temp$MIT) + marginError(temp$MIT), 
                                  mean(temp$EG1) + marginError(temp$EG1), 
                                  mean(temp$LG1) + marginError(temp$LG1), 
                                  mean(temp$ASYN) + marginError(temp$ASYN)))
gg1 = ggplot(temp.ribbon, aes(x = factor(timepoints, level = c("MIT", "EG1", "LG1", "ASYN")), 
                        y = value, group = 1)) +
  geom_line(color = "black") +
  geom_ribbon(aes(ymin = lower, ymax = upper), fill = "grey70", alpha = 0.3) +
  theme_classic() +
  ggtitle("Bobbie, PRO-seq", subtitle = "selected genes") +
  xlab ("timepoints") + ylab("value") + ylim(0, 65) +
  theme(plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5),
        legend.position = "none",
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

ggsave(filename = here(figDir, "bobbie_gene_EpiLC_dTAGvsDMSO_DOWN_95CI.png"), gg1, width = 4, height = 4, dpi = 300, units = "in", device = "png")


```

###[2.33] Finding closest enhancer
Here, enhancer will be defined by H3K27ac peak
It makes more sence to include the enhancer peak on gene body
(strategy 1) exclude +-10kb region from TSS only
(strategy 2) only check the E-P loops called in Micro-C
##### binary group - excluding all gene body
```{r}
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3), ensembl = V6, chr = V1) %>%
  dplyr::select(ensembl, chr, TSS)

group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene


gene.tb <- gene.tb %>% dplyr::filter(ensembl %in% c(group1, group2))
genes.gr <- GRanges(
  seqnames = gene.tb$chr,
  ranges = IRanges(start = gene.tb$TSS, end = gene.tb$TSS),
  ensembl = gene.tb$ensembl
)

genebody.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>% dplyr::select(V1, V2, V3)
colnames(genebody.tb) <- c("chr", "start", "end")
genebody.gr <- makeGRangesFromDataFrame(genebody.tb)


peak.H3K27ac <- importPeak(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.narrowPeak.bed"))
peak.H3K27ac<- makeGRangesFromDataFrame(fread(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.narrowPeak.bed")) %>% 
                                          dplyr::mutate(chr = V1, start = (V2 + V3)/2, end = (V2 + V3)/2) %>%
                                          dplyr::select(chr, start, end))

####### Filtering out peaks overlapping with the gene body
overlaps <- findOverlaps(genebody.gr, peak.H3K27ac)

# Indices of peaks that overlap the TSS
overlapping_peak_indices <- unique(subjectHits(overlaps))

# Exclude overlapping peaks
non_overlapping_peaks <- peak.H3K27ac[-overlapping_peak_indices]


####### Calculating distance to nearest peak
nearest_peak_indices <- nearest(genes.gr, non_overlapping_peaks)
nearest_peaks <- non_overlapping_peaks[nearest_peak_indices]
distances <- distance(genes.gr, nearest_peaks)


results <- data.frame(
  ensembl = mcols(genes.gr)$ensembl,
  gene_chr = as.character(seqnames(genes.gr)),
  gene_TSS = start(genes.gr),
  peak_chr = as.character(seqnames(nearest_peaks)),
  peak_start = start(nearest_peaks),
  peak_end = end(nearest_peaks),
  distance = distances
)

results <- results %>% dplyr::mutate(group = case_when(ensembl %in% group1 ~ "group1",
                                            ensembl %in% group2 ~ "group2",
                                            TRUE ~ NA))

ggplot(results, aes(x = group, y = distance, fill = group)) + geom_violin() + geom_boxplot(outlier.shape = NA, width = 0.1, fill = "white") + theme_bw() + ggtitle("distance to nearest H3K27ac peak excluding gene body") + scale_y_continuous(labels = label_kb_mb) + coord_cartesian(ylim = c(0, 250*1000))

  
```
##### binary group - excluding specific gene body
```{r}
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene


gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3), ensembl = V6, chr = V1) %>%
  dplyr::select(ensembl, chr, TSS) %>% dplyr::filter(ensembl %in% c(group1, group2))
genes.gr <- GRanges(
  seqnames = gene.tb$chr,
  ranges = IRanges(start = gene.tb$TSS, end = gene.tb$TSS),
  ensembl = gene.tb$ensembl
)

genebody.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>% dplyr::filter(V6 %in% c(group1, group2))%>% dplyr::select(V1, V2, V3)
colnames(genebody.tb) <- c("chr", "start", "end")
genebody.gr <- makeGRangesFromDataFrame(genebody.tb)


# Step 1: Find overlaps between gene bodies and peaks
overlaps <- findOverlaps(genebody.gr, peak.H3K27ac)

# Create a list mapping each gene to the indices of peaks overlapping with its gene body
overlapping_peaks_per_gene <- split(subjectHits(overlaps), queryHits(overlaps))

# Initialize an empty list to store results
results_list <- vector("list", length(genes.gr))

# Step 2: For each gene, exclude overlapping peaks and find the nearest peak to its TSS
for (i in seq_along(genes.gr)) {
  gene <- genes.gr[i]
  
  # Get indices of peaks overlapping with this gene's body
  overlapping_peak_indices <- overlapping_peaks_per_gene[[as.character(i)]]
  
  # Exclude overlapping peaks for this gene
  if (!is.null(overlapping_peak_indices)) {
    peaks_to_consider <- peak.H3K27ac[-overlapping_peak_indices]
  } else {
    peaks_to_consider <- peak.H3K27ac
  }
  
  # Find the nearest peak to the TSS of this gene
  nearest_peak_index <- nearest(gene, peaks_to_consider)
  
  if (is.na(nearest_peak_index)) {
    # No peaks found; set NA values
    results_list[[i]] <- data.frame(
      ensembl = mcols(gene)$ensembl,
      gene_chr = as.character(seqnames(gene)),
      gene_TSS = start(gene),
      peak_chr = NA,
      peak_start = NA,
      peak_end = NA,
      distance = NA
    )
  } else {
    nearest_peak <- peaks_to_consider[nearest_peak_index]
    dist <- distance(gene, nearest_peak)
    
    results_list[[i]] <- data.frame(
      ensembl = mcols(gene)$ensembl,
      gene_chr = as.character(seqnames(gene)),
      gene_TSS = start(gene),
      peak_chr = as.character(seqnames(nearest_peak)),
      peak_start = start(nearest_peak),
      peak_end = end(nearest_peak),
      distance = dist
    )
  }
}

# Combine results into a single data frame
results <- do.call(rbind, results_list)

# Add group information
results <- results %>% dplyr::mutate(group = case_when(
  ensembl %in% group1 ~ "group1",
  ensembl %in% group2 ~ "group2",
  TRUE ~ NA_character_
))

# Plotting
ggplot(results, aes(x = group, y = distance, fill = group)) +
  geom_violin() +
  geom_boxplot(outlier.shape = NA, width = 0.1, fill = "white") +
  theme_bw() +
  ggtitle("Distance to Nearest H3K27ac Peak Excluding Gene Body Overlaps") +
  scale_y_continuous(labels = label_kb_mb) +
  coord_cartesian(ylim = c(0, 50 * 1000))
```

##### binary group - excluding all TSS +- 10kb
```{r}
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3), ensembl = V6, chr = V1) %>%
  dplyr::select(ensembl, chr, TSS)

group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene


gene.tb <- gene.tb %>% dplyr::filter(ensembl %in% c(group1, group2))
genes.gr <- GRanges(
  seqnames = gene.tb$chr,
  ranges = IRanges(start = gene.tb$TSS, end = gene.tb$TSS),
  ensembl = gene.tb$ensembl
)

genebody.tb <- gene.tb %>% dplyr::mutate(start = TSS-10*1000, end = TSS + 10*1000) %>%
  dplyr::select(chr, start, end)

genebody.gr <- makeGRangesFromDataFrame(genebody.tb)


peak.H3K27ac <- importPeak(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.narrowPeak.bed"))
# peak.H3K27ac<- makeGRangesFromDataFrame(fread(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.narrowPeak.bed")) %>% 
#                                           dplyr::mutate(chr = V1, start = (V2 + V3)/2, end = (V2 + V3)/2) %>%
#                                           dplyr::select(chr, start, end))

####### Filtering out peaks overlapping with the gene body
overlaps <- findOverlaps(genebody.gr, peak.H3K27ac)

# Indices of peaks that overlap the TSS
overlapping_peak_indices <- unique(subjectHits(overlaps))

# Exclude overlapping peaks
non_overlapping_peaks <- peak.H3K27ac[-overlapping_peak_indices]


####### Calculating distance to nearest peak
nearest_peak_indices <- nearest(genes.gr, non_overlapping_peaks)
nearest_peaks <- non_overlapping_peaks[nearest_peak_indices]
distances <- distance(genes.gr, nearest_peaks)


results <- data.frame(
  ensembl = mcols(genes.gr)$ensembl,
  gene_chr = as.character(seqnames(genes.gr)),
  gene_TSS = start(genes.gr),
  peak_chr = as.character(seqnames(nearest_peaks)),
  peak_start = start(nearest_peaks),
  peak_end = end(nearest_peaks),
  distance = distances
)

results <- results %>% dplyr::mutate(group = case_when(ensembl %in% group1 ~ "group1",
                                            ensembl %in% group2 ~ "group2",
                                            TRUE ~ NA))

ggplot(results, aes(x = group, y = distance, fill = group)) + geom_violin() + geom_boxplot(outlier.shape = NA, width = 0.1, fill = "white") + theme_bw() + ggtitle("distance to nearest H3K27ac peak excluding gene body") + scale_y_continuous(labels = label_kb_mb) + coord_cartesian(ylim = c(0, 250*1000))

  
```

##### binary group - excluding specific TSS +- 10 kb
```{r}
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene


gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3), ensembl = V6, chr = V1) %>%
  dplyr::select(ensembl, chr, TSS) %>% dplyr::filter(ensembl %in% c(group1, group2))
genes.gr <- GRanges(
  seqnames = gene.tb$chr,
  ranges = IRanges(start = gene.tb$TSS, end = gene.tb$TSS),
  ensembl = gene.tb$ensembl
)

genebody.tb <- gene.tb %>% dplyr::mutate(start = TSS-10*1000, end = TSS + 10*1000) %>%
  dplyr::select(chr, start, end)
genebody.gr <- makeGRangesFromDataFrame(genebody.tb)

peak.H3K27ac <- importPeak(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.narrowPeak.bed"))


# Step 1: Find overlaps between gene bodies and peaks
overlaps <- findOverlaps(genebody.gr, peak.H3K27ac)

# Create a list mapping each gene to the indices of peaks overlapping with its gene body
overlapping_peaks_per_gene <- split(subjectHits(overlaps), queryHits(overlaps))

# Initialize an empty list to store results
results_list <- vector("list", length(genes.gr))

# Step 2: For each gene, exclude overlapping peaks and find the nearest peak to its TSS
for (i in seq_along(genes.gr)) {
  gene <- genes.gr[i]
  
  # Get indices of peaks overlapping with this gene's body
  overlapping_peak_indices <- overlapping_peaks_per_gene[[as.character(i)]]
  
  # Exclude overlapping peaks for this gene
  if (!is.null(overlapping_peak_indices)) {
    peaks_to_consider <- peak.H3K27ac[-overlapping_peak_indices]
  } else {
    peaks_to_consider <- peak.H3K27ac
  }
  
  # Find the nearest peak to the TSS of this gene
  nearest_peak_index <- nearest(gene, peaks_to_consider)
  
  if (is.na(nearest_peak_index)) {
    # No peaks found; set NA values
    results_list[[i]] <- data.frame(
      ensembl = mcols(gene)$ensembl,
      gene_chr = as.character(seqnames(gene)),
      gene_TSS = start(gene),
      peak_chr = NA,
      peak_start = NA,
      peak_end = NA,
      distance = NA
    )
  } else {
    nearest_peak <- peaks_to_consider[nearest_peak_index]
    dist <- distance(gene, nearest_peak)
    
    results_list[[i]] <- data.frame(
      ensembl = mcols(gene)$ensembl,
      gene_chr = as.character(seqnames(gene)),
      gene_TSS = start(gene),
      peak_chr = as.character(seqnames(nearest_peak)),
      peak_start = start(nearest_peak),
      peak_end = end(nearest_peak),
      distance = dist
    )
  }
}

# Combine results into a single data frame
results <- do.call(rbind, results_list)

# Add group information
results <- results %>% dplyr::mutate(group = case_when(
  ensembl %in% group1 ~ "group1",
  ensembl %in% group2 ~ "group2",
  TRUE ~ NA_character_
))

getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group == group1) )$distance
  distance2 <- (data %>% dplyr::filter(group == group2) )$distance
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

getPvalWilcox(results, "group1", "group2")
# Plotting
ggplot(results, aes(x = group, y = distance, fill = group)) +
  geom_violin() +
  geom_boxplot(outlier.shape = NA, width = 0.1, fill = "white") +
  theme_bw() +
  ggtitle("Distance to Nearest H3K27ac Peak Excluding Gene Body Overlaps") +
  scale_y_continuous(labels = label_kb_mb) +
  coord_cartesian(ylim = c(0, 50 * 1000)) +
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") 
```

##### Only limiting to E-P loops from Micro-C
```{r}

group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene



name <- "chromo_cons_annoHierarchy"

geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-pe_ensemblList.tsv")),
                                 diffCutoff = 0.2) %>%
  dplyr::filter(Anno2 == "P-E")

temp <- geneAnnoData %>% unnest(gene)

gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3), ensembl = V6, chr = V1) %>%
  dplyr::select(ensembl, chr, TSS)

temp <- dplyr::left_join(temp, gene.tb, by = c("gene" = "ensembl"))

temp <- temp %>% dplyr::mutate(center1 = (start1 + end1)/2,
                               center2 = (start2 + end2)/2,
                               distance1 = abs(TSS-center1),
                               distance2 = abs(TSS-center2)) %>%
  dplyr::mutate(distance = pmax(distance1, distance2))

temp <- temp %>% group_by(gene) %>% summarize(min_enh_distance = min(distance)) %>%
  dplyr::mutate(group = case_when(gene %in% group1 ~ "group1",
                                  gene %in% group2 ~ "group2",
                                  TRUE ~ NA)) %>%
  dplyr::filter(!is.na(group))


ggplot(temp, aes(x = group, y = min_enh_distance, fill = group)) + 
  geom_violin() +
  geom_boxplot(outlier.shape = NA, width = 0.1, fill = "white") +
  theme_bw() +
  ggtitle("Distance to Nearest Enhancer fro E-P") +
  scale_y_continuous(labels = label_kb_mb)  +
  coord_cartesian(ylim = c(0, 1000 * 1000))

```



### [2.24] (WORKING ON)Checking the percentage of non-important loops
Idea: having one non-perturbed reg loops could be enough.
```{r}
name <- "chromo_cons_annoHierarchy"
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                 diffCutoff = 0.2)


loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_UP_diff0.2.bedpe")) %>%
  dplyr::mutate(size = V3 - V2,
                id = paste(V1, size, V2, V5, sep = "_"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_NO_diff0.2.bedpe")) %>%
  dplyr::mutate(size = V3 - V2,
                id = paste(V1, size, V2, V5, sep = "_"))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_DOWN_diff0.2.bedpe")) %>%
  dplyr::mutate(size = V3 - V2,
                id = paste(V1, size, V2, V5, sep = "_"))

geneAnnoData <- geneAnnoData %>% dplyr::filter(id %in% c(loop.up$id, loop.no$id, loop.down$id)) %>%
  dplyr::mutate(loopType = case_when(id %in% c(loop.up$id, loop.no$id) ~ "insensitive",
                                     id %in% c(loop.down$id) ~ "sensitive",
                                     TRUE ~ NA))

data <- geneAnnoData %>% dplyr::select(gene, id, loopType) %>% unnest(gene) %>%
  group_by(gene) %>% summarize(
    insensitive = sum(loopType == "insensitive", na.rm = TRUE),
    sensitive = sum(loopType == "sensitive", na.rm = TRUE))



###

group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene

geneGroup <- tibble(group = c(rep("group1", length(group1)), rep("group2", length(group2))),
                    gene = c(group1, group2))

data <- left_join(geneGroup, data, by = c("gene")) %>% dplyr::mutate(mutate(across(everything(), ~replace_na(., 0))))


###
data <- data %>% dplyr::mutate(presenceOfRetained = if_else(insensitive > 0, "YES", "NO"),
                       percOfRetained = if_else(insensitive + sensitive == 0, 0, 100*insensitive/(insensitive + sensitive)))


###
ggplot(data, aes(x = group, y = percOfRetained, fill = group)) + geom_violin() + 
  geom_boxplot(outlier.shape = NA, width = 0.1, fill = "white") + theme_bw()


### Stacked barplot
group <- c("group1", "group1", "group2", "group2")
presenceOfRetained <- rep(c("YES", "NO"), 2)
presenceOfRetained <- factor (presenceOfRetained, levels = c("YES", "NO"))
value <- c(nrow(data %>% dplyr::filter(group == "group1", presenceOfRetained == "YES")),
           nrow(data %>% dplyr::filter(group == "group1", presenceOfRetained == "NO")),
           nrow(data %>% dplyr::filter(group == "group2", presenceOfRetained == "YES")),
           nrow(data %>% dplyr::filter(group == "group2", presenceOfRetained == "NO")))
plotData <- data.frame(group, presenceOfRetained, value)

ggplot(plotData, aes(fill=presenceOfRetained, y=value, x=group)) + 
    geom_bar(position="fill", stat="identity") + theme_classic()


```

```{r}
name <- "chromo_cons_annoHierarchy"
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-pe_ensemblList.tsv")),
                                 diffCutoff = 0.2)


loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_UP_diff0.2.bedpe")) %>%
  dplyr::mutate(size = V3 - V2,
                id = paste(V1, size, V2, V5, sep = "_"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_NO_diff0.2.bedpe")) %>%
  dplyr::mutate(size = V3 - V2,
                id = paste(V1, size, V2, V5, sep = "_"))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_DOWN_diff0.2.bedpe")) %>%
  dplyr::mutate(size = V3 - V2,
                id = paste(V1, size, V2, V5, sep = "_"))

geneAnnoData <- geneAnnoData %>% dplyr::filter(id %in% c(loop.up$id, loop.no$id, loop.down$id)) %>%
  dplyr::mutate(loopType = case_when(id %in% c(loop.up$id, loop.no$id) ~ "insensitive",
                                     id %in% c(loop.down$id) ~ "sensitive",
                                     TRUE ~ NA))

data <- geneAnnoData %>% dplyr::select(gene, id, loopType) %>% unnest(gene) %>%
  group_by(gene) %>% summarize(
    insensitive = sum(loopType == "insensitive", na.rm = TRUE),
    sensitive = sum(loopType == "sensitive", na.rm = TRUE))



###

group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene

geneGroup <- tibble(group = c(rep("group1", length(group1)), rep("group2", length(group2))),
                    gene = c(group1, group2))

data <- left_join(geneGroup, data, by = c("gene")) %>% dplyr::mutate(mutate(across(everything(), ~replace_na(., 0))))


###
data <- data %>% dplyr::mutate(presenceOfRetained = if_else(insensitive > 0, "YES", "NO"),
                       percOfRetained = if_else(insensitive + sensitive == 0, 0, 100*insensitive/(insensitive + sensitive)))


###
ggplot(data, aes(x = group, y = percOfRetained, fill = group)) + geom_violin() + 
  geom_boxplot(outlier.shape = NA, width = 0.05, fill = "white") + theme_bw()


### Stacked barplot
group <- c("group1", "group1", "group2", "group2")
presenceOfRetained <- rep(c("YES", "NO"), 2)
presenceOfRetained <- factor (presenceOfRetained, levels = c("YES", "NO"))
value <- c(nrow(data %>% dplyr::filter(group == "group1", presenceOfRetained == "YES")),
           nrow(data %>% dplyr::filter(group == "group1", presenceOfRetained == "NO")),
           nrow(data %>% dplyr::filter(group == "group2", presenceOfRetained == "YES")),
           nrow(data %>% dplyr::filter(group == "group2", presenceOfRetained == "NO")))
plotData <- data.frame(group, presenceOfRetained, value)

ggplot(plotData, aes(fill=presenceOfRetained, y=value, x=group)) + 
    geom_bar(position="fill", stat="identity") + theme_classic()


```

### [2.25] Obs/Exp loops


#### Merging obs/exp scores from different resolution & samples
##### G1
```{r}
loops <- fread(here(consensusDir, "chromo_cons_score.tsv"))

sample <- "G1.DMSO.Merged"
for(sample in c("G1.DMSO.Merged", "G1.dTAG.Merged", "G1.A485.Merged")){
  temp.25kb <- fread(here(consensusDir, paste0("loopScore_", sample, "_25kb.tsv"))) %>%
    dplyr::mutate(binSize = end1 - start1,
                  id = paste(chrom1, binSize, start1, start2, sep = "_")) %>%
    dplyr::filter(id %in% loops$id) %>%
    dplyr::select(chrom1, start1, end1, chrom2, start2, end2, id, observed, "O/E")
  colnames(temp.25kb) <- c("chrom1", "start1", "end1", "chrom2", "start2", "end2", "id", "obs", "obsexp")
  
  temp.10kb <- fread(here(consensusDir, paste0("loopScore_", sample, "_10kb.tsv"))) %>%
    dplyr::mutate(binSize = end1 - start1,
                  id = paste(chrom1, binSize, start1, start2, sep = "_")) %>%
    dplyr::filter(id %in% loops$id) %>%
    dplyr::select(chrom1, start1, end1, chrom2, start2, end2, id, observed, "O/E")
  colnames(temp.10kb) <- c("chrom1", "start1", "end1", "chrom2", "start2", "end2", "id", "obs", "obsexp")
  
  temp.5kb <-fread(here(consensusDir, paste0("loopScore_", sample, "_5kb.tsv"))) %>%
    dplyr::mutate(binSize = end1 - start1,
                  id = paste(chrom1, binSize, start1, start2, sep = "_")) %>%
    dplyr::filter(id %in% loops$id) %>%
    dplyr::select(chrom1, start1, end1, chrom2, start2, end2, id, observed, "O/E")
  colnames(temp.5kb) <- c("chrom1", "start1", "end1", "chrom2", "start2", "end2", "id", "obs", "obsexp")
  
  temp <- bind_rows(temp.25kb, temp.10kb, temp.5kb)
  fwrite(temp, here(consensusDir, paste0("loopScore_", sample, ".tsv")), sep = "\t")
}


temp.DMSO <- fread(here(consensusDir, "loopScore_G1.DMSO.Merged.tsv")) %>%
  dplyr::select(id, obs, obsexp)
colnames(temp.DMSO) <- c("id", "obs_DMSO", "obsexp_DMSO")
temp.dTAG <- fread(here(consensusDir, "loopScore_G1.dTAG.Merged.tsv")) %>%
  dplyr::select(id, obs, obsexp)
colnames(temp.dTAG) <- c("id", "obs_dTAG", "obsexp_dTAG")
temp.A485 <- fread(here(consensusDir, "loopScore_G1.A485.Merged.tsv")) %>%
  dplyr::select(id, obs, obsexp)
colnames(temp.A485) <- c("id", "obs_A485", "obsexp_A485")

loops_oe <- full_join(full_join(temp.DMSO, temp.dTAG, by = "id"),
          temp.A485, by = "id") %>%
  dplyr::mutate(oeFC_dTAG_DMSO = if_else(obsexp_DMSO == 0, NA, obsexp_dTAG/obsexp_DMSO),
                oeFC_A485_DMSO = if_else(obsexp_DMSO == 0, NA, obsexp_A485/obsexp_DMSO))

fwrite(loops_oe, here(consensusDir, paste0("loopScore_cons_obsexp.tsv")), sep = "\t")
```

##### Async
```{r}
loops <- fread(here(consensusDir, "chromo_cons_score.tsv"))

for(sample in c("Async.UT", "Async.AID")){
  temp.25kb <- fread(here(consensusDir, paste0("loopScore_", sample, "_25kb.tsv"))) %>%
    dplyr::mutate(binSize = end1 - start1,
                  id = paste(chrom1, binSize, start1, start2, sep = "_")) %>%
    dplyr::filter(id %in% loops$id) %>%
    dplyr::select(chrom1, start1, end1, chrom2, start2, end2, id, observed, "O/E")
  colnames(temp.25kb) <- c("chrom1", "start1", "end1", "chrom2", "start2", "end2", "id", "obs", "obsexp")
  
  temp.10kb <- fread(here(consensusDir, paste0("loopScore_", sample, "_10kb.tsv"))) %>%
    dplyr::mutate(binSize = end1 - start1,
                  id = paste(chrom1, binSize, start1, start2, sep = "_")) %>%
    dplyr::filter(id %in% loops$id) %>%
    dplyr::select(chrom1, start1, end1, chrom2, start2, end2, id, observed, "O/E")
  colnames(temp.10kb) <- c("chrom1", "start1", "end1", "chrom2", "start2", "end2", "id", "obs", "obsexp")
  
  temp.5kb <-fread(here(consensusDir, paste0("loopScore_", sample, "_5kb.tsv"))) %>%
    dplyr::mutate(binSize = end1 - start1,
                  id = paste(chrom1, binSize, start1, start2, sep = "_")) %>%
    dplyr::filter(id %in% loops$id) %>%
    dplyr::select(chrom1, start1, end1, chrom2, start2, end2, id, observed, "O/E")
  colnames(temp.5kb) <- c("chrom1", "start1", "end1", "chrom2", "start2", "end2", "id", "obs", "obsexp")
  
  temp <- bind_rows(temp.25kb, temp.10kb, temp.5kb)
  fwrite(temp, here(consensusDir, paste0("loopScore_", sample, ".tsv")), sep = "\t")
}


temp.UT <- fread(here(consensusDir, "loopScore_Async.UT.tsv")) %>%
  dplyr::select(id, obs, obsexp)
colnames(temp.UT) <- c("id", "obs_UT", "obsexp_UT")
temp.AID <- fread(here(consensusDir, "loopScore_Async.AID.tsv")) %>%
  dplyr::select(id, obs, obsexp)
colnames(temp.AID) <- c("id", "obs_AID", "obsexp_AID")

loops_oe <- full_join(temp.UT, temp.AID, by = "id") %>%
  dplyr::mutate(oeFC_AID_UT = if_else(obsexp_UT == 0, NA, obsexp_AID/obsexp_UT))

fwrite(loops_oe, here(consensusDir, paste0("loopScore_cons_obsexp_Async.tsv")), sep = "\t")
```

#### Merging obs/exp per resolution
Only the loops that made to consensus loops are considered
##### G1
```{r}
loops <- fread(here(consensusDir, "chromo_cons_score.tsv"))

res <- 25

for (res in c(25, 10, 5)){
  
  temp.DMSO <- fread(here(consensusDir, paste0("loopScore_G1.DMSO.Merged_", res, "kb.tsv"))) %>%
    dplyr::mutate(binSize = end1 - start1,
                  id = paste(chrom1, binSize, start1, start2, sep = "_")) %>%
    dplyr::filter(id %in% loops$id) %>%
    dplyr::select(id, observed, "O/E")
  colnames(temp.DMSO) <- c("id", "obs_DMSO", "obsexp_DMSO")
  
  temp.dTAG <- fread(here(consensusDir, paste0("loopScore_G1.dTAG.Merged_", res, "kb.tsv"))) %>%
    dplyr::mutate(binSize = end1 - start1,
                  id = paste(chrom1, binSize, start1, start2, sep = "_")) %>%
    dplyr::filter(id %in% loops$id) %>%
    dplyr::select(id, observed, "O/E")
  colnames(temp.dTAG) <- c("id", "obs_dTAG", "obsexp_dTAG")
  
  temp.A485 <- fread(here(consensusDir, paste0("loopScore_G1.A485.Merged_", res, "kb.tsv"))) %>%
    dplyr::mutate(binSize = end1 - start1,
                  id = paste(chrom1, binSize, start1, start2, sep = "_")) %>%
    dplyr::filter(id %in% loops$id) %>%
    dplyr::select(id, observed, "O/E")
  colnames(temp.A485) <- c("id", "obs_A485", "obsexp_A485")
  
  temp <- full_join(full_join(temp.DMSO, temp.dTAG, by = c("id")), temp.A485, by = c("id")) %>% 
    dplyr::mutate(oeFC_dTAG_DMSO = if_else(obsexp_DMSO == 0, NA, obsexp_dTAG/obsexp_DMSO),
                  oeFC_A485_DMSO = if_else(obsexp_DMSO == 0, NA, obsexp_A485/obsexp_DMSO))
  
  fwrite(temp, here(consensusDir, paste0("loopScore_cons_obsexp_", res, "kb.tsv")), sep = "\t")
}


```

##### Async
```{r}
loops <- fread(here(consensusDir, "chromo_cons_score.tsv"))

res <- 25

for (res in c(25, 10, 5)){
  
  temp.UT <- fread(here(consensusDir, paste0("loopScore_Async.UT_", res, "kb.tsv"))) %>%
    dplyr::mutate(binSize = end1 - start1,
                  id = paste(chrom1, binSize, start1, start2, sep = "_")) %>%
    dplyr::filter(id %in% loops$id) %>%
    dplyr::select(id, observed, "O/E")
  colnames(temp.UT) <- c("id", "obs_UT", "obsexp_UT")
  
  temp.AID <- fread(here(consensusDir, paste0("loopScore_Async.AID_", res, "kb.tsv"))) %>%
    dplyr::mutate(binSize = end1 - start1,
                  id = paste(chrom1, binSize, start1, start2, sep = "_")) %>%
    dplyr::filter(id %in% loops$id) %>%
    dplyr::select(id, observed, "O/E")
  colnames(temp.AID) <- c("id", "obs_AID", "obsexp_AID")

  
  temp <- full_join(temp.UT, temp.AID, by = c("id")) %>% 
    dplyr::mutate(oeFC_AID_UT = if_else(obsexp_UT == 0, NA, obsexp_AID/obsexp_UT))
  
  fwrite(temp, here(consensusDir, paste0("loopScore_cons_obsexp_", res, "kb_async.tsv")), sep = "\t")
}


```

#### Comparing obs/exp from chrommosight loop scores
##### dTAG per res
```{r}
# Importing loops
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_UP_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_DOWN_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_NO_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))

for(res in c(25, 10, 5)){
  minValue <- -4
  maxValue <- 5
  obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp_", res, "kb.tsv"))) %>%
    dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                  log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
                  log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)))
  
  
  # dTAG
  obsexp$density <- get_density(obsexp$log_obsexp_DMSO, obsexp$log_obsexp_dTAG, n = 100)
  obsexp <- obsexp %>% dplyr::arrange(density)
  g1 <- ggplot(obsexp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) + 
    geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
    geom_abline(slope = 1, intercept = 0) +
    ggtitle(paste0(res, "kb, log2(obs/exp)")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 
  
  
  # Visualize UP DOWN loops
  temp <- obsexp %>% dplyr::filter(id %in% loop.down$id)
  temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
  temp <- temp %>% dplyr::arrange(density)
  g2 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) + 
    geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
    geom_abline(slope = 1, intercept = 0) +
    ggtitle(paste0(res, "kb, log2(obs/exp), down")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
  
  
  temp <- obsexp %>% dplyr::filter(id %in% loop.no$id)
  temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
  temp <- temp %>% dplyr::arrange(density)
  g3 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) + 
    geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
    geom_abline(slope = 1, intercept = 0) +
    ggtitle(paste0(res, "kb, log2(obs/exp), no")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
  
  temp <- obsexp %>% dplyr::filter(id %in% loop.up$id)
  temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
  temp <- temp %>% dplyr::arrange(density)
  g4 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) + 
    geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
    geom_abline(slope = 1, intercept = 0) +
    ggtitle(paste0(res, "kb, log2(obs/exp), up")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
  
  
  fileName <- paste0("obsexp_dTAG_vs_DMSO_", res, "kb")
  height <- 4
  width <- 12
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(plot_grid(g1, g4, g3, g2, ncol = 4))
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(plot_grid(g1, g4, g3, g2, ncol = 4))
  dev.off()
}

```

##### dTAG all res
```{r}
# Importing loops
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_UP_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_DOWN_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_NO_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))

minValue <- -4
maxValue <- 5

obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
  dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
                log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)))



# dTAG
obsexp$density <- get_density(obsexp$log_obsexp_DMSO, obsexp$log_obsexp_dTAG, n = 100)
obsexp <- obsexp %>% dplyr::arrange(density)
g1 <- ggplot(obsexp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp)")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

# Visualize UP DOWN loops
temp <- obsexp %>% dplyr::filter(id %in% loop.down$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g2 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), down")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 


temp <- obsexp %>% dplyr::filter(id %in% loop.no$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g3 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), no")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

temp <- obsexp %>% dplyr::filter(id %in% loop.up$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g4 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), up")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 


fileName <- paste0("obsexp_dTAG_vs_DMSO")
height <- 4
width <- 12
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()


# Structural
loop.str <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.str$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g5 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), structural loops")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

# Reg (PE-PE)
loop.reg <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.reg$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g6 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), regulatory loops")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

fileName <- paste0("obsexp_dTAG_vs_DMSO_str_vs_reg")
height <- 4
width <- 6
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
```
###### Plotting only subset
```{r}

specificLoop.25kb <- fread(here(loopDir, "G1.DMSO.Merged_chromosight_25kb.bedpe")) %>%
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
specificLoop.10kb <- fread(here(loopDir, "G1.DMSO.Merged_chromosight_10kb.bedpe")) %>%
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
specificLoop.5kb <- fread(here(loopDir, "G1.DMSO.Merged_chromosight_5kb.bedpe")) %>%
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))

specificLoops.DMSO <- c(specificLoop.25kb$id, specificLoop.10kb$id, specificLoop.5kb$id)


minValue <- -4
maxValue <- 5

obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
  dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
                log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)))




# Structural
loop.str <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.str$id,
                                 id %in% specificLoops.DMSO)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g5 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), structural loops")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

# Reg (PE-PE)
loop.reg <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.reg$id,
                                 id %in% specificLoops.DMSO)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g6 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), regulatory loops")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

fileName <- paste0("obsexp_dTAG_vs_DMSO_str_vs_reg_DMSOspecificLoops")
height <- 4
width <- 6
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()

```
###### Plotting 1mb
```{r}
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))

# Import obs/exp scores and merge to the dataset
minValue <- -4
maxValue <- 5
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
    dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                  log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
                  log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)))

# Merge dataset
data <- data %>% dplyr::left_join(obsexp, by = c("id")) %>% dplyr::mutate(distance = start2 - start1)


# Importing loops
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_UP_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_DOWN_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_NO_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))

# distance filter
obsexp <- data %>% dplyr::filter(distance > 1e6)


# dTAG
obsexp$density <- get_density(obsexp$log_obsexp_DMSO, obsexp$log_obsexp_dTAG, n = 100)
obsexp <- obsexp %>% dplyr::arrange(density)
g1 <- ggplot(obsexp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp)")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

# Visualize UP DOWN loops
temp <- obsexp %>% dplyr::filter(id %in% loop.down$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g2 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), down")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 


temp <- obsexp %>% dplyr::filter(id %in% loop.no$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g3 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), no")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

temp <- obsexp %>% dplyr::filter(id %in% loop.up$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g4 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), up")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 


fileName <- paste0("obsexp_dTAG_vs_DMSO_1mbover")
height <- 4
width <- 12
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()


# Structural
loop.str <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.str$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g5 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), structural loops")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

# Reg (PE-PE)
loop.reg <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.reg$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g6 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), regulatory loops")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

fileName <- paste0("obsexp_dTAG_vs_DMSO_1mbover_str_vs_reg")
height <- 4
width <- 6
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
```

###### Plotting Obs
```{r}
minValue <- -4
maxValue <- 10
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
    dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                  log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
                  log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)),
                  log_obs_DMSO = if_else(obs_DMSO == 0, minValue, log2(obs_DMSO)),
                  log_obs_dTAG = if_else(obs_dTAG == 0, minValue, log2(obs_dTAG)),
                  log_obs_A485 = if_else(obs_A485 == 0, minValue, log2(obs_A485)))

# Importing loops
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_UP_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_DOWN_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_NO_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))



# dTAG
obsexp$density <- get_density(obsexp$log_obs_DMSO, obsexp$log_obs_dTAG, n = 100)
obsexp <- obsexp %>% dplyr::arrange(density)
g1 <- ggplot(obsexp, aes(x = log_obs_DMSO, y = log_obs_dTAG, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs)")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

# Visualize UP DOWN loops
temp <- obsexp %>% dplyr::filter(id %in% loop.down$id)
temp$density <- get_density(temp$log_obs_DMSO, temp$log_obs_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g2 <- ggplot(temp, aes(x = log_obs_DMSO, y = log_obs_dTAG, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs), down")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 


temp <- obsexp %>% dplyr::filter(id %in% loop.no$id)
temp$density <- get_density(temp$log_obs_DMSO, temp$log_obs_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g3 <- ggplot(temp, aes(x = log_obs_DMSO, y = log_obs_dTAG, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs), no")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

temp <- obsexp %>% dplyr::filter(id %in% loop.up$id)
temp$density <- get_density(temp$log_obs_DMSO, temp$log_obs_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g4 <- ggplot(temp, aes(x = log_obs_DMSO, y = log_obs_dTAG, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs), up")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 


fileName <- paste0("obsexp_dTAG_vs_DMSO_obs")
height <- 4
width <- 12
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()


# Structural
loop.str <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.str$id)
temp$density <- get_density(temp$log_obs_DMSO, temp$log_obs_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g5 <- ggplot(temp, aes(x = log_obs_DMSO, y = log_obs_dTAG, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs), structural loops")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

# Reg (PE-PE)
loop.reg <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.reg$id)
temp$density <- get_density(temp$log_obs_DMSO, temp$log_obs_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g6 <- ggplot(temp, aes(x = log_obs_DMSO, y = log_obs_dTAG, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs), regulatory loops")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

fileName <- paste0("obsexp_dTAG_vs_DMSO_obs_str_vs_reg")
height <- 4
width <- 6
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
```

##### Async.AID all res
```{r}
# Importing loops
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_UP_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_DOWN_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_NO_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))

minValue <- -8
maxValue <- 7

obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp_Async.tsv"))) %>%
  dplyr::mutate(log_obsexp_UT = if_else(obsexp_UT == 0, minValue, log2(obsexp_UT)),
                log_obsexp_AID = if_else(obsexp_AID == 0, minValue, log2(obsexp_AID)))



# dTAG
obsexp$density <- get_density(obsexp$log_obsexp_UT, obsexp$log_obsexp_AID, n = 100)
obsexp <- obsexp %>% dplyr::arrange(density)
g1 <- ggplot(obsexp, aes(x = log_obsexp_UT, y = log_obsexp_AID, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp)")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

# Visualize UP DOWN loops
temp <- obsexp %>% dplyr::filter(id %in% loop.down$id)
temp$density <- get_density(temp$log_obsexp_UT, temp$log_obsexp_AID, n = 100)
temp <- temp %>% dplyr::arrange(density)
g2 <- ggplot(temp, aes(x = log_obsexp_UT, y = log_obsexp_AID, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), down")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 


temp <- obsexp %>% dplyr::filter(id %in% loop.no$id)
temp$density <- get_density(temp$log_obsexp_UT, temp$log_obsexp_AID, n = 100)
temp <- temp %>% dplyr::arrange(density)
g3 <- ggplot(temp, aes(x = log_obsexp_UT, y = log_obsexp_AID, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), no")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

temp <- obsexp %>% dplyr::filter(id %in% loop.up$id)
temp$density <- get_density(temp$log_obsexp_UT, temp$log_obsexp_AID, n = 100)
temp <- temp %>% dplyr::arrange(density)
g4 <- ggplot(temp, aes(x = log_obsexp_UT, y = log_obsexp_AID, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), up")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 


fileName <- paste0("obsexp_AID_vs_UT")
height <- 4
width <- 12
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()


# Structural
loop.str <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.str$id)
temp$density <- get_density(temp$log_obsexp_UT, temp$log_obsexp_AID, n = 100)
temp <- temp %>% dplyr::arrange(density)
g5 <- ggplot(temp, aes(x = log_obsexp_UT, y = log_obsexp_AID, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), structural loops")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

# Reg (PE-PE)
loop.reg <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.reg$id)
temp$density <- get_density(temp$log_obsexp_UT, temp$log_obsexp_AID, n = 100)
temp <- temp %>% dplyr::arrange(density)
g6 <- ggplot(temp, aes(x = log_obsexp_UT, y = log_obsexp_AID, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), regulatory loops")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

fileName <- paste0("obsexp_AID_vs_UT_str_vs_reg")
height <- 4
width <- 6
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
```

###### Plotting Obs
```{r}

minValue <- -20
maxValue <- 10

obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp_Async.tsv"))) %>%
  dplyr::mutate(log_obsexp_UT = if_else(obsexp_UT == 0, minValue, log2(obsexp_UT)),
                log_obsexp_AID = if_else(obsexp_AID == 0, minValue, log2(obsexp_AID)),
                log_obs_UT = if_else(obs_UT == 0, minValue, log2(obs_UT)),
                log_obs_AID = if_else(obs_AID == 0, minValue, log2(obs_AID)))

# Importing loops
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_UP_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_DOWN_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_NO_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))



# dTAG
obsexp$density <- get_density(obsexp$log_obs_UT, obsexp$log_obs_AID, n = 100)
obsexp <- obsexp %>% dplyr::arrange(density)
g1 <- ggplot(obsexp, aes(x = log_obs_UT, y = log_obs_AID, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs)")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

# Visualize UP DOWN loops
temp <- obsexp %>% dplyr::filter(id %in% loop.down$id)
temp$density <- get_density(temp$log_obs_UT, temp$log_obs_AID, n = 100)
temp <- temp %>% dplyr::arrange(density)
g2 <- ggplot(temp, aes(x = log_obs_UT, y = log_obs_AID, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs), down")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 


temp <- obsexp %>% dplyr::filter(id %in% loop.no$id)
temp$density <- get_density(temp$log_obs_UT, temp$log_obs_AID, n = 100)
temp <- temp %>% dplyr::arrange(density)
g3 <- ggplot(temp, aes(x = log_obs_UT, y = log_obs_AID, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs), no")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

temp <- obsexp %>% dplyr::filter(id %in% loop.up$id)
temp$density <- get_density(temp$log_obs_UT, temp$log_obs_AID, n = 100)
temp <- temp %>% dplyr::arrange(density)
g4 <- ggplot(temp, aes(x = log_obs_UT, y = log_obs_AID, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs), up")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 


fileName <- paste0("obs_AID_vs_UT_obs")
height <- 4
width <- 12
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()


# Structural
loop.str <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.str$id)
temp$density <- get_density(temp$log_obs_UT, temp$log_obs_AID, n = 100)
temp <- temp %>% dplyr::arrange(density)
g5 <- ggplot(temp, aes(x = log_obs_UT, y = log_obs_AID, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs), structural loops")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

# Reg (PE-PE)
loop.reg <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.reg$id)
temp$density <- get_density(temp$log_obs_UT, temp$log_obs_AID, n = 100)
temp <- temp %>% dplyr::arrange(density)
g6 <- ggplot(temp, aes(x = log_obs_UT, y = log_obs_AID, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs), regulatory loops")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

fileName <- paste0("obs_AID_vs_UT_obs_str_vs_reg")
height <- 4
width <- 6
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
```

##### A485 per res
```{r}
# Importing loops
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_A485vsDMSO_UP_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_A485vsDMSO_DOWN_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_A485vsDMSO_NO_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))

for(res in c(25, 10, 5)){
  minValue <- -4
  maxValue <- 5
  obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp_", res, "kb.tsv"))) %>%
    dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                  log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)))
  
  
  # A485
  obsexp$density <- get_density(obsexp$log_obsexp_DMSO, obsexp$log_obsexp_A485, n = 100)
  obsexp <- obsexp %>% dplyr::arrange(density)
  g1 <- ggplot(obsexp, aes(x = log_obsexp_DMSO, y = log_obsexp_A485, color = density)) + 
    geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
    geom_abline(slope = 1, intercept = 0) +
    ggtitle(paste0(res, "kb, log2(obs/exp)")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 
  
  
  # Visualize UP DOWN loops
  temp <- obsexp %>% dplyr::filter(id %in% loop.down$id)
  temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_A485, n = 100)
  temp <- temp %>% dplyr::arrange(density)
  g2 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_A485, color = density)) + 
    geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
    geom_abline(slope = 1, intercept = 0) +
    ggtitle(paste0(res, "kb, log2(obs/exp), down")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 
  
  
  temp <- obsexp %>% dplyr::filter(id %in% loop.no$id)
  temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_A485, n = 100)
  temp <- temp %>% dplyr::arrange(density)
  g3 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_A485, color = density)) + 
    geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
    geom_abline(slope = 1, intercept = 0) +
    ggtitle(paste0(res, "kb, log2(obs/exp), no")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 
  
  temp <- obsexp %>% dplyr::filter(id %in% loop.up$id)
  temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_A485, n = 100)
  temp <- temp %>% dplyr::arrange(density)
  g4 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_A485, color = density)) + 
    geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
    geom_abline(slope = 1, intercept = 0) +
    ggtitle(paste0(res, "kb, log2(obs/exp), up")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 
  
  
  fileName <- paste0("obsexp_A485_vs_DMSO_", res, "kb")
  height <- 4
  width <- 12
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(plot_grid(g1, g4, g3, g2, ncol = 4))
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(plot_grid(g1, g4, g3, g2, ncol = 4))
  dev.off()
}

```
##### A485 all res
```{r}
# Importing loops
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_A485vsDMSO_UP_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_A485vsDMSO_DOWN_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_A485vsDMSO_NO_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))

minValue <- -4
maxValue <- 5

obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
    dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                  log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)))


# A485
obsexp$density <- get_density(obsexp$log_obsexp_DMSO, obsexp$log_obsexp_A485, n = 100)
obsexp <- obsexp %>% dplyr::arrange(density)
g1 <- ggplot(obsexp, aes(x = log_obsexp_DMSO, y = log_obsexp_A485, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp)")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

# Visualize UP DOWN loops
temp <- obsexp %>% dplyr::filter(id %in% loop.down$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_A485, n = 100)
temp <- temp %>% dplyr::arrange(density)
g2 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_A485, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), down")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 


temp <- obsexp %>% dplyr::filter(id %in% loop.no$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_A485, n = 100)
temp <- temp %>% dplyr::arrange(density)
g3 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_A485, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), no")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

temp <- obsexp %>% dplyr::filter(id %in% loop.up$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_A485, n = 100)
temp <- temp %>% dplyr::arrange(density)
g4 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_A485, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), up")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 


fileName <- paste0("obsexp_A485_vs_DMSO")
height <- 4
width <- 12
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()


# Structural
loop.str <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.str$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_A485, n = 100)
temp <- temp %>% dplyr::arrange(density)
g5 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_A485, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), structural loops")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

# Reg (PE-PE)
loop.reg <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.reg$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_A485, n = 100)
temp <- temp %>% dplyr::arrange(density)
g6 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_A485, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs/exp), regulatory loops")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

fileName <- paste0("obsexp_A485_vs_DMSO_str_vs_reg")
height <- 4
width <- 6
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
```
###### Plotting Obs
```{r}
minValue <- -4
maxValue <- 10
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
    dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                  log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
                  log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)),
                  log_obs_DMSO = if_else(obs_DMSO == 0, minValue, log2(obs_DMSO)),
                  log_obs_dTAG = if_else(obs_dTAG == 0, minValue, log2(obs_dTAG)),
                  log_obs_A485 = if_else(obs_A485 == 0, minValue, log2(obs_A485)))

# Importing loops
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_A485vsDMSO_UP_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_A485vsDMSO_DOWN_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_A485vsDMSO_NO_diff0.2.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))



# dTAG
obsexp$density <- get_density(obsexp$log_obs_DMSO, obsexp$log_obs_A485, n = 100)
obsexp <- obsexp %>% dplyr::arrange(density)
g1 <- ggplot(obsexp, aes(x = log_obs_DMSO, y = log_obs_A485, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs)")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

# Visualize UP DOWN loops
temp <- obsexp %>% dplyr::filter(id %in% loop.down$id)
temp$density <- get_density(temp$log_obs_DMSO, temp$log_obs_A485, n = 100)
temp <- temp %>% dplyr::arrange(density)
g2 <- ggplot(temp, aes(x = log_obs_DMSO, y = log_obs_A485, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs), down")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 


temp <- obsexp %>% dplyr::filter(id %in% loop.no$id)
temp$density <- get_density(temp$log_obs_DMSO, temp$log_obs_A485, n = 100)
temp <- temp %>% dplyr::arrange(density)
g3 <- ggplot(temp, aes(x = log_obs_DMSO, y = log_obs_A485, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs), no")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

temp <- obsexp %>% dplyr::filter(id %in% loop.up$id)
temp$density <- get_density(temp$log_obs_DMSO, temp$log_obs_A485, n = 100)
temp <- temp %>% dplyr::arrange(density)
g4 <- ggplot(temp, aes(x = log_obs_DMSO, y = log_obs_A485, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs), up")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 


fileName <- paste0("obsexp_A485_vs_DMSO_obs")
height <- 4
width <- 12
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()


# Structural
loop.str <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.str$id)
temp$density <- get_density(temp$log_obs_DMSO, temp$log_obs_A485, n = 100)
temp <- temp %>% dplyr::arrange(density)
g5 <- ggplot(temp, aes(x = log_obs_DMSO, y = log_obs_A485, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs), structural loops")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

# Reg (PE-PE)
loop.reg <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe")) %>% 
  dplyr::mutate(binSize = V3 - V2,
                id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.reg$id)
temp$density <- get_density(temp$log_obs_DMSO, temp$log_obs_A485, n = 100)
temp <- temp %>% dplyr::arrange(density)
g6 <- ggplot(temp, aes(x = log_obs_DMSO, y = log_obs_A485, color = density)) + 
  geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
  geom_abline(slope = 1, intercept = 0) +
  ggtitle(paste0("log2(obs), regulatory loops")) +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) 

fileName <- paste0("obsexp_A485_vs_DMSO_obs_str_vs_reg")
height <- 4
width <- 6
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
```

### [2.26] Repeating few analysis with obs/exp
#### Function
```{r}
create_dist_vs_avgScore_perTreatment_oe <- function(data, figDir, name, loopList, colorList, se = FALSE){
  avg_scores_long <- data %>%
    group_by(distance, Anno2) %>%
    summarise(avg_score = mean(score, na.rm = TRUE)) %>%
    ungroup() 
  avg_scores_long$Anno2 <- factor(avg_scores_long$Anno2, level = loopList)
  p4 <- ggplot(avg_scores_long, aes(x = distance, y = avg_score, color = Anno2, fill = Anno2)) + 
    geom_smooth(show.legend = TRUE, se = se)  +
    #ylim(0, 0.5) +
    theme_classic() +  scale_x_continuous(labels = label_kb_mb) +
    scale_color_manual(values = colorList) +
    scale_fill_manual(values = colorList) +
    labs(title = paste0(name),
         x = "Distance",
         y = "Average Score") +
    theme(plot.title = element_text(size = 8))
  
  fileName <- paste0("dist_vs_score_linePlot_", name)
  png(here(figDir, paste0(fileName, ".png")), 
      res = 600, units = "in", width = 4, height = 3)
  print(p4)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")), 
          width = 4, height = 3)
  print(p4)
  dev.off()
} 

create_dist_vs_avgDiffScore_perTreatment_oe <- function(data, figDir, name, loopList, colorList, se = FALSE){
  avg_scores_long <- data %>%
    group_by(distance, Anno2) %>%
    summarise(avg_score = mean(score, na.rm = TRUE)) %>%
    ungroup() 
  avg_scores_long$Anno2 <- factor(avg_scores_long$Anno2, level = loopList)
  p4 <- ggplot(avg_scores_long, aes(x = distance, y = avg_score, color = Anno2, fill = Anno2)) + 
    geom_hline(yintercept = 0) +
    geom_smooth(show.legend = TRUE, se = se)  +
    theme_classic() +  scale_x_continuous(labels = label_kb_mb) +
    scale_color_manual(values = colorList) +
    scale_fill_manual(values = colorList) +
    labs(title = paste0(name),
         x = "Distance",
         y = "Average Diff Score") +
    theme(plot.title = element_text(size = 8)) 
  
  fileName <- paste0("dist_vs_score_difflinePlot_", name)
  png(here(figDir, paste0(fileName, ".png")), 
      res = 600, units = "in", width = 4, height = 3)
  print(p4)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")), 
          width = 4, height = 3)
  print(p4)
  dev.off()
} 

create_dist_barplot_oe <- function(data, figDir, name, note, loopList, diffCutoff, distanceFilter = 2*e6){
    data <- data %>% 
    dplyr::mutate(distance = start2 - start1,
                  updown_dTAG_DMSO = ifelse(log_obsexp_diff_dTAG_DMSO > diffCutoff, "UP", 
                                            ifelse(log_obsexp_diff_dTAG_DMSO > -diffCutoff, "NO", "DOWN")),
                  updown_A485_DMSO = ifelse(log_obsexp_diff_A485_DMSO > diffCutoff, "UP", 
                                            ifelse(log_obsexp_diff_A485_DMSO > -diffCutoff, "NO", "DOWN"))) %>%
    dplyr::filter(Anno2 %in% loopList,
                  distance < distanceFilter)
  data$updown_dTAG_DMSO <- factor(data$updown_dTAG_DMSO, levels = c("UP", "NO", "DOWN"))
  data$updown_A485_DMSO <- factor(data$updown_A485_DMSO, levels = c("UP", "NO", "DOWN"))
  
  
  temp <- data %>% dplyr::select(Anno2, distance, updown_dTAG_DMSO, updown_A485_DMSO) %>%
    dplyr::filter(Anno2 %in% loopList,
                  updown_dTAG_DMSO %in% c("UP", "NO", "DOWN"))
  p <- ggplot(temp, aes(x = updown_dTAG_DMSO, y = distance)) +
    geom_violin(aes(fill = updown_dTAG_DMSO)) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = updown_dTAG_DMSO), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() + ggtitle(note) +
    scale_y_continuous(labels = label_kb_mb)
  fileName <- paste0("size_barplot_", name, "_dTAG_vs_DMSO_", note, "_", diffCutoff)
  height <- 3
  width <- 4
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
  
  
  temp <- data %>% dplyr::select(Anno2, distance, updown_dTAG_DMSO, updown_A485_DMSO) %>%
    dplyr::filter(Anno2 %in% loopList,
                  updown_A485_DMSO %in% c("UP", "NO",  "DOWN"))
  p <- ggplot(temp, aes(x = updown_A485_DMSO, y = distance)) +
    geom_violin(aes(fill = updown_A485_DMSO)) + 
    geom_boxplot(width = 0.1, outlier.shape = NA) +
    stat_summary(aes(group = updown_A485_DMSO), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
    theme_classic() + ggtitle(note) +
    scale_y_continuous(labels = label_kb_mb)
  fileName <- paste0("size_barplot_", name, "_A485_vs_DMSO_", note, "_", diffCutoff)
  height <- 3
  width <- 4
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}

create_loop_scatterplot_oe <- function(data, figDir, name, Anno2List, diffCutoff){
  data <- data %>%
    dplyr::mutate(updown_dTAG_DMSO = ifelse(log_obsexp_diff_dTAG_DMSO > diffCutoff, "UP", 
                                            ifelse(log_obsexp_diff_dTAG_DMSO > -diffCutoff, "NO", "DOWN")),
                  updown_A485_DMSO = ifelse(log_obsexp_diff_A485_DMSO > diffCutoff, "UP", 
                                            ifelse(log_obsexp_diff_A485_DMSO > -diffCutoff, "NO", "DOWN"))) %>%
    dplyr::filter(Anno2 %in% Anno2List)
  data$updown_dTAG_DMSO <- factor(data$updown_dTAG_DMSO, levels = c("UP", "NO", "DOWN"))
  data$updown_A485_DMSO <- factor(data$updown_A485_DMSO, levels = c("UP", "NO", "DOWN"))
  
  num.up <- (summary(data$updown_dTAG_DMSO))["UP"]
  num.no <- (summary(data$updown_dTAG_DMSO))["NO"]
  num.down <- (summary(data$updown_dTAG_DMSO))["DOWN"]
  num.all <- num.up + num.no + num.down
  perc.up <- round(num.up / num.all * 100, 2)
  perc.no <- round(num.no / num.all * 100, 2)
  perc.down <- round(num.down / num.all * 100, 2)
  
  ### Scatterplot
    minValue <- -4
  maxValue <- 5
  data$density <- get_density(data$log_obsexp_DMSO, data$log_obsexp_dTAG, n = 100)
  data <- data %>% dplyr::arrange(density)
  correlation <- cor(data$log_obsexp_DMSO, data$log_obsexp_dTAG)
  p1 <- ggplot(data, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) +
    geom_point(show.legend = FALSE) + 
    scale_color_viridis() +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) +
    geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
    geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
    geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
    geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
    geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
    annotate("text", x = minValue, y = maxValue, label = paste0("UP: ", num.up, " (", perc.up, "%)"), 
             color = "black", hjust = 0, size = 3) +
    annotate("text", x = minValue, y = maxValue-1, label = paste0("NO: ", num.no, " (", perc.no, "%)"), 
             color = "black", hjust = 0, size = 3) +
    annotate("text", x = minValue, y = maxValue-2, label = paste0("DOWN: ", num.down, " (", perc.down, "%)"), 
             color = "black", hjust = 0, size = 3) +
    theme_classic() + ggtitle(name) + theme(plot.title = element_text(size = 5)) +
    annotate("text", x = minValue, y = maxValue, label = paste("r =", round(correlation, 2)), size = 5, color = "black")

  
  
  num.up <- (summary(data$updown_A485_DMSO))["UP"]
  num.no <- (summary(data$updown_A485_DMSO))["NO"]
  num.down <- (summary(data$updown_A485_DMSO))["DOWN"]
  num.all <- num.up + num.no + num.down
  perc.up <- round(num.up / num.all * 100, 2)
  perc.no <- round(num.no / num.all * 100, 2)
  perc.down <- round(num.down / num.all * 100, 2)
  
  data$density <- get_density(data$log_obsexp_DMSO, data$log_obsexp_A485, n = 100)
  data <- data %>% dplyr::arrange(density)
  correlation <- cor(data$log_obsexp_DMSO, data$log_obsexp_A485)
  p2 <- ggplot(data, aes(x = log_obsexp_DMSO, y = log_obsexp_A485, color = density)) +
    geom_point(show.legend = FALSE) + 
    scale_color_viridis() +
    coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) +
    geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
    geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
    geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
    geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
    geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
    annotate("text", x = minValue, y = maxValue, label = paste0("UP: ", num.up, " (", perc.up, "%)"), 
             color = "black", hjust = 0, size = 3) +
    annotate("text", x = minValue, y = maxValue-1, label = paste0("NO: ", num.no, " (", perc.no, "%)"), 
             color = "black", hjust = 0, size = 3) +
    annotate("text", x = minValue, y = maxValue-2, label = paste0("DOWN: ", num.down, " (", perc.down, "%)"), 
             color = "black", hjust = 0, size = 3) +
    theme_classic() + ggtitle(name) + theme(plot.title = element_text(size = 5)) +
    annotate("text", x = minValue, y = maxValue, label = paste("r =", round(correlation, 2)), size = 5, color = "black")

  
  
  fileName <- paste0("scatterplot_", name, "_", diffCutoff)
  png(here(figDir, paste0(fileName, ".png")), res = 600, units = "in", width = 5*1.5, height = 2.5*1.5)
  print(cowplot::plot_grid(p1, p2, align = "h"))
  dev.off()
  
  svglite(here(figDir, paste0(fileName, ".svg")), 
          width = 5*1.5, height = 2.5*1.5)
  print(cowplot::plot_grid(p1, p2, align = "h"))
  dev.off()
}

make_diff_bedpe_oe <- function(data, name, Anno2List, outDir, diffCutoff){
  data <- data %>%
    dplyr::mutate(updown_dTAG_DMSO = ifelse(log_obsexp_diff_dTAG_DMSO > diffCutoff, "UP", 
                                            ifelse(log_obsexp_diff_dTAG_DMSO > -diffCutoff, "NO", "DOWN")),
                  updown_A485_DMSO = ifelse(log_obsexp_diff_A485_DMSO > diffCutoff, "UP", 
                                            ifelse(log_obsexp_diff_A485_DMSO > -diffCutoff, "NO", "DOWN"))) %>%
    dplyr::filter(Anno2 %in% Anno2List)
  data$updown_dTAG_DMSO <- factor(data$updown_dTAG_DMSO, levels = c("UP", "NO", "DOWN"))
  data$updown_A485_DMSO <- factor(data$updown_A485_DMSO, levels = c("UP", "NO", "DOWN"))
  
  out.temp <- data %>% dplyr::filter(updown_dTAG_DMSO == "UP") %>% dplyr::select(c(1, 2, 3, 4, 5, 6))
  fwrite(out.temp, here(outDir, paste0(name, "_dTAGvsDMSO_UP_diff", diffCutoff, ".bedpe")), 
         sep = "\t", col.names = FALSE)
  out.temp <- data %>% dplyr::filter(updown_dTAG_DMSO == "NO") %>% dplyr::select(c(1, 2, 3, 4, 5, 6))
  fwrite(out.temp, here(outDir, paste0(name, "_dTAGvsDMSO_NO_diff", diffCutoff, ".bedpe")), 
         sep = "\t", col.names = FALSE)
  out.temp <- data %>% dplyr::filter(updown_dTAG_DMSO == "DOWN") %>% dplyr::select(c(1, 2, 3, 4, 5, 6))
  fwrite(out.temp, here(outDir, paste0(name, "_dTAGvsDMSO_DOWN_diff", diffCutoff, ".bedpe")), 
         sep = "\t", col.names = FALSE)
  
  out.temp <- data %>% dplyr::filter(updown_A485_DMSO == "UP") %>% dplyr::select(c(1, 2, 3, 4, 5, 6))
  fwrite(out.temp, here(outDir, paste0(name, "_A485vsDMSO_UP_diff", diffCutoff, ".bedpe")), 
         sep = "\t", col.names = FALSE)
  out.temp <- data %>% dplyr::filter(updown_A485_DMSO == "NO") %>% dplyr::select(c(1, 2, 3, 4, 5, 6))
  fwrite(out.temp, here(outDir, paste0(name, "_A485vsDMSO_NO_diff", diffCutoff, ".bedpe")), 
         sep = "\t", col.names = FALSE)
  out.temp <- data %>% dplyr::filter(updown_A485_DMSO == "DOWN") %>% dplyr::select(c(1, 2, 3, 4, 5, 6))
  fwrite(out.temp, here(outDir, paste0(name, "_A485vsDMSO_DOWN_diff", diffCutoff, ".bedpe")), 
         sep = "\t", col.names = FALSE)
}
```

#### Distance vs score plot
##### dTAG
###### OE
```{r}
# Import annotation
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))

# Import obs/exp scores and merge to the dataset
minValue <- -4
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
    dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                  log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
                  log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)))

# Merge dataset
data <- data %>% dplyr::left_join(obsexp, by = c("id"))

# Plot
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, log_obsexp_DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment_oe(temp, figDir, paste0(name, "_DMSO_logOE"), unique(data$Anno2), colorListLoop)

temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, log_obsexp_dTAG, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment_oe(temp, figDir, paste0(name, "_dTAG_logOE"), unique(data$Anno2), colorListLoop)


# Creating figures per each condition, differential
temp <- data %>% dplyr::mutate(distance = start2 - start1,
                               log_obsexp_diff_dTAG_DMSO = log_obsexp_dTAG - log_obsexp_DMSO) %>% 
  dplyr::select(distance, log_obsexp_diff_dTAG_DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgDiffScore_perTreatment_oe(temp, figDir, paste0(name, "_dTAG_logOE"), unique(data$Anno2), colorListLoop)
```

###### OE, grouped
```{r}
# Import annotation
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))

# Import obs/exp scores and merge to the dataset
minValue <- -4
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
    dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                  log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
                  log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)))

# Merge dataset
data <- data %>% dplyr::left_join(obsexp, by = c("id"))

temp <- data %>% dplyr::mutate(distance = start2 - start1,
                               log_obsexp_diff_dTAG_DMSO = log_obsexp_dTAG - log_obsexp_DMSO) %>% 
  dplyr::select(distance, log_obsexp_diff_dTAG_DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")



temp <- temp %>% dplyr::mutate(Anno3 = case_when(Anno2 %in% c("S-X", "S-S") ~ "Structural",
                                                 Anno2 %in% c("P-P", "P-E", "E-E") ~ "Pure_Regulatory",
                                                 Anno2 %in% c("P-S", "P-X", "E-S", "E-X") ~ "Relaxed_Regulatory")) %>%
  dplyr::filter(!is.na(Anno3))

loopList <- rev(c("Structural", "Relaxed_Regulatory", "Pure_Regulatory"))
colorList <- rev(c(palette_3[["grey2"]], strong_green, darken(strong_green, amount = 0.5)))
avg_scores_long <- temp %>%
  group_by(distance, Anno3) %>%
  summarise(avg_score = mean(score, na.rm = TRUE)) %>%
  ungroup() 
avg_scores_long$Anno3 <- factor(avg_scores_long$Anno3, level = loopList)

p4 <- ggplot(avg_scores_long, aes(x = distance, y = avg_score, color = Anno3)) + 
  geom_hline(yintercept = 0,
             color = "black",
             size = lineThick*mmToLineUnit,
             lineend = "square") +
  geom_smooth(show.legend = TRUE, se = TRUE,
              size = lineThick*mmToLineUnit,
              lineend = "square",
              aes(fill = Anno3))  +
  theme_classic() + 
  scale_x_continuous(labels = label_kb_mb) +
  scale_color_manual(values = colorList) +
  scale_fill_manual(values = colorList) +
  labs(x = "Loop size",
       y = "Δ log2(obs/exp)",
       color = "Loop types", fill = "Loop types") +
  theme(
    axis.title = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  ) +
  guides(
    color = guide_legend(
      keywidth = 0.5,  # Adjust the width of the legend color squares
      keyheight = 0.5  # Adjust the height of the legend color squares
    )
  )





fileName <- paste0("dist_vs_score_difflinePlot_grouped", name)

width <- panelSize(2.5)*mmToInch
height <- panelSize(1.25)*mmToInch
png(here(figDir, paste0(fileName, ".png")), 
    res = 600, units = "in", width = width, height = height)
print(p4)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), 
        width = width, height = height)
print(p4)
dev.off()



```

###### Observed
```{r}
# Import annotation
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))

# Import obs/exp scores and merge to the dataset
minValue <- -4
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
    dplyr::mutate(log_obs_DMSO = if_else(obs_DMSO == 0, minValue, log2(obs_DMSO)),
                  log_obs_dTAG = if_else(obs_dTAG == 0, minValue, log2(obs_dTAG)),
                  log_obs_A485 = if_else(obs_A485 == 0, minValue, log2(obs_A485)))

# Merge dataset
data <- data %>% dplyr::left_join(obsexp, by = c("id"))

# Plot
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, log_obs_DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment_oe(temp, figDir, paste0(name, "_DMSO_logObs"), unique(data$Anno2), colorListLoop)

temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, log_obs_dTAG, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment_oe(temp, figDir, paste0(name, "_dTAG_logObs"), unique(data$Anno2), colorListLoop)


# Creating figures per each condition, differential
temp <- data %>% dplyr::mutate(distance = start2 - start1,
                               log_obs_diff_dTAG_DMSO = log_obs_dTAG - log_obs_DMSO) %>% 
  dplyr::select(distance, log_obs_diff_dTAG_DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgDiffScore_perTreatment_oe(temp, figDir, paste0(name, "_dTAG_logObs"), unique(data$Anno2), colorListLoop)
```

##### A485
```{r}
# Import annotation
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))

# Import obs/exp scores and merge to the dataset
minValue <- -4
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
    dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                  log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
                  log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)))

# Merge dataset
data <- data %>% dplyr::left_join(obsexp, by = c("id"))

# Plot
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, log_obsexp_A485, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment_oe(temp, figDir, paste0(name, "_A485_logOE"), unique(data$Anno2), colorListLoop)


# Creating figures per each condition, differential
temp <- data %>% dplyr::mutate(distance = start2 - start1,
                               log_obsexp_diff_A485_DMSO = log_obsexp_A485 - log_obsexp_DMSO) %>% 
  dplyr::select(distance, log_obsexp_diff_A485_DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgDiffScore_perTreatment_oe(temp, figDir, paste0(name, "_A485_logOE"), unique(data$Anno2), colorListLoop)
```

#### Get size distribution of differential loops
```{r}
# Import annotation
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))

# Import obs/exp scores and merge to the dataset
minValue = -4
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
    dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                  log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
                  log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)))

# Merge dataset
data <- data %>% dplyr::left_join(obsexp, by = c("id")) %>%
  dplyr::mutate(log_obsexp_diff_dTAG_DMSO = log_obsexp_dTAG - log_obsexp_DMSO,
                log_obsexp_diff_A485_DMSO = log_obsexp_A485 - log_obsexp_DMSO)

diffCutoff <- 0.5
create_loop_scatterplot_oe(data, figDir, paste0(name, "_logOE_", "all"), unique(data$Anno2), diffCutoff)
create_loop_scatterplot_oe(data, figDir, paste0(name, "_logOE_", "reg"), c("P-P", "P-E", "E-E"), diffCutoff)
create_loop_scatterplot_oe(data, figDir, paste0(name, "_logOE_", "str"), c("S-S", "S-X"), diffCutoff)



create_dist_barplot_oe(data, figDir, paste0(name, "_logOE"), "all",  unique(data$Anno2), diffCutoff)
create_dist_barplot_oe(data, figDir, paste0(name, "_logOE"), "reg",  c("P-P", "P-E", "E-E"), diffCutoff)
create_dist_barplot_oe(data, figDir, paste0(name, "_logOE"), "reg_1mb",  c("P-P", "P-E", "E-E"), diffCutoff, 1e6)
create_dist_barplot_oe(data, figDir, paste0(name, "_logOE"), "str",  c("S-S", "S-X"), diffCutoff)
create_dist_barplot_oe(data, figDir, paste0(name, "_logOE"), "str_1mb",  c("S-S", "S-X"), diffCutoff, 1e6)

```
#### Splitting loops into diff bedpe
```{r}
# Import annotation
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))

# Import obs/exp scores and merge to the dataset
minValue = -4
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
    dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                  log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
                  log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)))

# Merge dataset
data <- data %>% dplyr::left_join(obsexp, by = c("id")) %>%
  dplyr::mutate(log_obsexp_diff_dTAG_DMSO = log_obsexp_dTAG - log_obsexp_DMSO,
                log_obsexp_diff_A485_DMSO = log_obsexp_A485 - log_obsexp_DMSO)
diffCutoff <- 0.5
make_diff_bedpe_oe(data, paste0(name, "_logOE_", "all"), unique(data$Anno2), consensusDir, diffCutoff)
make_diff_bedpe_oe(data, paste0(name, "_logOE_", "pe-pe"), c("P-P", "P-E", "E-E"), consensusDir, diffCutoff)
make_diff_bedpe_oe(data, paste0(name, "_logOE_", "str"), c("S-S", "S-X"), consensusDir, diffCutoff)

```

### [2.27] Extracting TSS for deeptools
```{r}
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene

flankSize <- 1
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3),
                TSSstart = TSS - flankSize,
                TSSend = TSS + flankSize) %>%
  dplyr::select(V1, TSSstart, TSSend, V6)
colnames(gene.tb) <- c("chr", "start", "end", "ensembl")

bed.1 <- gene.tb %>% dplyr::filter(ensembl %in% group1) %>% dplyr::select(c(1, 2, 3))
bed.2 <- gene.tb %>% dplyr::filter(ensembl %in% group2) %>% dplyr::select(c(1, 2, 3))

fwrite(bed.1, here(refDir, "TSS_binaryGroup1.bed"), sep = "\t", col.names = FALSE)
fwrite(bed.2, here(refDir, "TSS_binaryGroup2.bed"), sep = "\t", col.names = FALSE)



gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed"))

bed.1 <- gene.tb %>% dplyr::filter(V6 %in% group1)
bed.2 <- gene.tb %>% dplyr::filter(V6 %in% group2)

fwrite(bed.1, here(refDir, "TSS_binaryGroup1_gtf.bed"), sep = "\t", col.names = FALSE)
fwrite(bed.2, here(refDir, "TSS_binaryGroup2_gtf.bed"), sep = "\t", col.names = FALSE)

```

### [2.28] Checking gene expression levels of binaryGroup
```{r}
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene

data <- fread(here(refDir, "readCount.filtered.TPM.all.tsv")) %>% dplyr::mutate(
  group = case_when(ensembl %in% group1 ~ "group1",
                    ensembl %in% group2 ~ "group2",
                    TRUE ~ NA)
) %>%
  dplyr::filter(!is.na(group)) %>%
  dplyr::select(c(34, 1, 3, 4, 5, 14, 15, 34))

data <- data %>% mutate(average = rowMeans(across(colnames(data)[3:7])))

temp <- data %>% dplyr::select(group, average)


getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$average
  distance2 <- (data %>% dplyr::filter(group ==group2) )$average
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

pval <- convPvalue(getPvalWilcox(temp, "group1", "group2"))

ggplot(temp, aes(x = group, y = average)) + geom_boxplot() + scale_y_log10() +
  ylab("avgTPM") + theme_classic() +
  annotate("text", x = 1, y = 10000, label = pval)




```

### [2.29] Checking overlap with Stripes
```{r}
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene

flankSize <- 1
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3),
                TSSstart = TSS - flankSize,
                TSSend = TSS + flankSize) %>%
  dplyr::select(V1, TSSstart, TSSend, V6)
colnames(gene.tb) <- c("chr", "start", "end", "ensembl")

bed.1 <- gene.tb %>% dplyr::filter(ensembl %in% group1) %>% dplyr::select(c(1, 2, 3))
bed.2 <- gene.tb %>% dplyr::filter(ensembl %in% group2) %>% dplyr::select(c(1, 2, 3))


data <- fread(here("../..", "result", "stripenn", "result_filtered.tsv"))

ggplot(data, aes(x = width)) + geom_histogram() + theme_classic() +
 scale_x_continuous(labels = label_kb_mb)


## Checking overlap with gene TSS

tss <- fread(here(refDir, "mm10_GRCm38.p6_TSS2.5kb.bed"))

tss.group1 <- tss %>% dplyr::filter(V6 %in% group1) %>% dplyr::select(V1, V2, V3)
colnames(tss.group1) <- c("chr", "start", "end")
tss.group1.gr <- makeGRangesFromDataFrame(tss.group1)

tss.group2 <- tss %>% dplyr::filter(V6 %in% group2) %>% dplyr::select(V1, V2, V3)
colnames(tss.group2) <- c("chr", "start", "end")
tss.group2.gr <- makeGRangesFromDataFrame(tss.group2)

## Anchor of stripes
temp <- data %>% dplyr::select(chr, pos1, pos2)
colnames(temp) <- c("chr", "start", "end")

stripeAnchor.gr <- makeGRangesFromDataFrame(temp)

overlap.group1 <- findOverlaps(tss.group1.gr, stripeAnchor.gr)
overlap.group2 <- findOverlaps(tss.group2.gr, stripeAnchor.gr)

length(unique(queryHits(overlap.group1)))
length(unique(queryHits(overlap.group2)))


## body of stripes
temp <- data %>% dplyr::select(chr, pos3, pos4)
colnames(temp) <- c("chr", "start", "end")

stripeBody.gr <- makeGRangesFromDataFrame(temp)

overlap.group1 <- findOverlaps(tss.group1.gr, stripeBody.gr)
overlap.group2 <- findOverlaps(tss.group2.gr, stripeBody.gr)

length(unique(queryHits(overlap.group1)))
length(unique(queryHits(overlap.group2)))

## Checking RAD21/CTCF presence at anchor
overlap.body.ctcf <- findOverlaps(stripeBody.gr, peak.CTCF)
overlap.body.rad21 <- findOverlaps(stripeBody.gr, peak.RAD21)
overlap.anchor.ctcf <- findOverlaps(stripeAnchor.gr, peak.CTCF)
overlap.anchor.rad21 <- findOverlaps(stripeAnchor.gr, peak.RAD21)

length(unique(queryHits(overlap.body.ctcf)))
length(unique(queryHits(overlap.body.rad21)))

length(unique(queryHits(overlap.anchor.ctcf)))
length(unique(queryHits(overlap.anchor.rad21)))
  
## Checking overlap with loop anchor (PE-PE)
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_UP_diff0.2.bedpe"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_NO_diff0.2.bedpe"))
anchor.upno <- extractAnchor(bind_rows(loop.up, loop.no))

loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_DOWN_diff0.2.bedpe"))
anchor.down <- extractAnchor(loop.down)


overlap.body.upno <- findOverlaps(stripeBody.gr, anchor.upno)
overlap.body.down <- findOverlaps(stripeBody.gr, anchor.down)
overlap.anchor.upno <- findOverlaps(stripeAnchor.gr, anchor.upno)
overlap.anchor.down <- findOverlaps(stripeAnchor.gr, anchor.down)

length(unique(subjectHits(overlap.body.upno)))
length(unique(subjectHits(overlap.body.down)))
length(unique(subjectHits(overlap.anchor.upno)))
length(unique(subjectHits(overlap.anchor.down)))


## Checking overlap with loop anchor (ALL)
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_UP_diff0.2.bedpe"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_NO_diff0.2.bedpe"))
anchor.upno <- extractAnchor(bind_rows(loop.up, loop.no))

loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_DOWN_diff0.2.bedpe"))
anchor.down <- extractAnchor(loop.down)


overlap.body.upno <- findOverlaps(stripeBody.gr, anchor.upno)
overlap.body.down <- findOverlaps(stripeBody.gr, anchor.down)
overlap.anchor.upno <- findOverlaps(stripeAnchor.gr, anchor.upno)
overlap.anchor.down <- findOverlaps(stripeAnchor.gr, anchor.down)

length(unique(subjectHits(overlap.body.upno)))
length(unique(subjectHits(overlap.body.down)))
length(unique(subjectHits(overlap.anchor.upno)))
length(unique(subjectHits(overlap.anchor.down)))


## Checking overlap with loop anchor (Structure)
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure_dTAGvsDMSO_UP_diff0.2.bedpe"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure_dTAGvsDMSO_NO_diff0.2.bedpe"))
anchor.upno <- extractAnchor(bind_rows(loop.up, loop.no))

loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure_dTAGvsDMSO_DOWN_diff0.2.bedpe"))
anchor.down <- extractAnchor(loop.down)

overlap.body.upno <- findOverlaps(stripeBody.gr, anchor.upno)
overlap.body.down <- findOverlaps(stripeBody.gr, anchor.down)
overlap.anchor.upno <- findOverlaps(stripeAnchor.gr, anchor.upno)
overlap.anchor.down <- findOverlaps(stripeAnchor.gr, anchor.down)

length(unique(subjectHits(overlap.body.upno)))
length(unique(subjectHits(overlap.body.down)))
length(unique(subjectHits(overlap.anchor.upno)))
length(unique(subjectHits(overlap.anchor.down)))


length(unique(subjectHits(overlap.body.upno)))/length(anchor.upno)*100
length(unique(subjectHits(overlap.body.down)))/length(anchor.down)*100
length(unique(subjectHits(overlap.anchor.upno)))/length(anchor.upno)*100
length(unique(subjectHits(overlap.anchor.down)))/length(anchor.down)*100


## Checking overlap with loop anchor (RelaxedReg)
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_regulatory_dTAGvsDMSO_UP_diff0.2.bedpe"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_regulatory_dTAGvsDMSO_NO_diff0.2.bedpe"))
anchor.upno <- extractAnchor(bind_rows(loop.up, loop.no))

loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_regulatory_dTAGvsDMSO_DOWN_diff0.2.bedpe"))
anchor.down <- extractAnchor(loop.down)

overlap.body.upno <- findOverlaps(stripeBody.gr, anchor.upno)
overlap.body.down <- findOverlaps(stripeBody.gr, anchor.down)
overlap.anchor.upno <- findOverlaps(stripeAnchor.gr, anchor.upno)
overlap.anchor.down <- findOverlaps(stripeAnchor.gr, anchor.down)

length(unique(subjectHits(overlap.body.upno)))
length(unique(subjectHits(overlap.body.down)))
length(unique(subjectHits(overlap.anchor.upno)))
length(unique(subjectHits(overlap.anchor.down)))


length(unique(subjectHits(overlap.body.upno)))/length(anchor.upno)*100
length(unique(subjectHits(overlap.body.down)))/length(anchor.down)*100
length(unique(subjectHits(overlap.anchor.upno)))/length(anchor.upno)*100
length(unique(subjectHits(overlap.anchor.down)))/length(anchor.down)*100




```

### [2.30] Hubs from Dylan Paper
#### All loops
```{r}
results <- tibble(i = numeric(), upno = numeric(), down = numeric())

for(i in seq(1, 10)){
  data <- fread(here(refDir, "Dylan_hub_esc.csv"))
  
  data.hub <- data %>% dplyr::filter(all_lcon > i)
  temp <- data.hub %>% dplyr::select(c(1, 2, 3))
  colnames(temp) <- c("chr", "start", "end")
  hub.anchor <- makeGRangesFromDataFrame(temp)
  
  
  
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_UP_diff0.2.bedpe"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_NO_diff0.2.bedpe"))
anchor.upno <- extractAnchor(bind_rows(loop.up, loop.no))

loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_DOWN_diff0.2.bedpe"))
anchor.down <- extractAnchor(loop.down)

  
  overlap.upno <- findOverlaps(anchor.upno, hub.anchor)
  overlap.down <- findOverlaps(anchor.down, hub.anchor)
  
  n1 <- length(unique(queryHits(overlap.upno)))
  n2 <- length(unique(queryHits(overlap.down)))
  
  perc1 <- round(n1/length(overlap.upno)*100, 2)
  perc2 <- round(n2/length(overlap.down)*100, 2)
  
  results <- results %>% add_row(i = i, upno = perc1, down = perc2)
}



results_long <- results %>%
  pivot_longer(cols = c(upno, down), names_to = "Group", values_to = "Percentage")

results_long$Group <- factor(results_long$Group, levels = c("upno", "down"))
# Create the bar plot
ggplot(results_long, aes(x = factor(i), y = Percentage, fill = Group)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(
    title = "All loops",
    x = "Presence of hub anchor with >i connections",
    y = "Percentage"
  ) +
  theme_minimal() +
  scale_fill_manual(values = c("upno" = "blue", "down" = "red")) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) + ylim(0, 100)
```

##### Fishers
```{r}
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_UP_diff0.2.bedpe"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_NO_diff0.2.bedpe"))
loop.upno <- (bind_rows(loop.up, loop.no))

loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_DOWN_diff0.2.bedpe"))

getOverlapLoopNum <- function(loop, peak){
  anchor1 <- GRanges(seqnames = loop$V1, ranges = IRanges(start = loop$V2, end = loop$V3))
  anchor2 <- GRanges(seqnames = loop$V4, ranges = IRanges(start = loop$V5, end = loop$V6))
  a <- queryHits(findOverlaps(anchor1, peak))
  b <- queryHits(findOverlaps(anchor2, peak))
  return(length(unique(c(a, b))))
}

getSEOverlapFisher <- function(allLoop, subsetLoop, peak){
  all.overlap <- getOverlapLoopNum(allLoop, peak)
  all.notOverlap <- nrow(allLoop) - all.overlap
  
  subset.overlap <- getOverlapLoopNum(subsetLoop, peak)
  subset.notOverlap <- nrow(subsetLoop) - subset.overlap
  
  contingency_table <- matrix(c(subset.overlap, subset.notOverlap,
                                all.overlap, all.notOverlap), nrow = 2, byrow = TRUE)
  colnames(contingency_table) <- c("Overlapping", "Not_Overlapping")
  rownames(contingency_table) <- c("All loops", "Subset loops")
  
  # Perform Fisher's Exact Test
  fisher_test_result <- fisher.test(contingency_table)
  return(fisher_test_result)
}

name <- "chromo_cons_annoHierarchy"
loop <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe"))


i <- 2
data <- fread(here(refDir, "Dylan_hub_esc.csv"))

data.hub <- data %>% dplyr::filter(all_lcon > i)
temp <- data.hub %>% dplyr::select(c(1, 2, 3))
colnames(temp) <- c("chr", "start", "end")
hub.anchor <- makeGRangesFromDataFrame(temp)


temp <- getSEOverlapFisher(loop, loop.upno, hub.anchor)
result.tb <- tibble(loopType = "UP/NO",
                    target = paste0("hub_", i),
                    pvalue = temp$p.value,
                    oddsRatio = temp$estimate)
temp <- getSEOverlapFisher(loop, loop.down, hub.anchor)
result.tb <- result.tb %>% 
  add_row(loopType = "DOWN",
          target =  paste0("hub_", i),
          pvalue = temp$p.value,
          oddsRatio = temp$estimate)


for(i in c(5)){
  data <- fread(here(refDir, "Dylan_hub_esc.csv"))
  
  data.hub <- data %>% dplyr::filter(all_lcon > i)
  temp <- data.hub %>% dplyr::select(c(1, 2, 3))
  colnames(temp) <- c("chr", "start", "end")
  hub.anchor <- makeGRangesFromDataFrame(temp)
  
  
  temp <- getSEOverlapFisher(loop, loop.upno, hub.anchor)
  result.tb <- result.tb %>% add_row(loopType = "UP/NO",
                      target = paste0("hub_", i),
                      pvalue = temp$p.value,
                      oddsRatio = temp$estimate)
  temp <- getSEOverlapFisher(loop, loop.down, hub.anchor)
  result.tb <- result.tb %>% 
    add_row(loopType = "DOWN",
            target =  paste0("hub_", i),
            pvalue = temp$p.value,
            oddsRatio = temp$estimate)
  
  
}

library(circlize)
data <- result.tb
heatmap_data <- data %>% dplyr::select(target, loopType, oddsRatio) %>%
pivot_wider(names_from = loopType, values_from = oddsRatio) %>%
  column_to_rownames(var = "target")

pvalue_data <- data %>% dplyr::select(target, loopType, pvalue) %>%
  pivot_wider(names_from = loopType, values_from = pvalue) %>%
  column_to_rownames(var = "target")

# col_fun <- colorRamp2(c(0, 1, 2), 
#                       c("blue", "white", "red"))

data$target <- factor(data$target, levels = c("hub_5", "hub_2"))
data$loopType <- factor(data$loopType, levels = c("UP/NO", "DOWN"))
p <- ggplot(data, aes(x = loopType, y = target, size = -log10(pvalue), fill = oddsRatio)) +
  geom_point(shape = 21,        # Ensures a point with an outline
             stroke = 1*ptToMM      # Line width for the border
  ) + theme_bw() + 
  scale_size_continuous(range = c(1, 3)) +  # Set min and max point sizes here
  scale_fill_gradientn(colors = c("#4852A0", "white", "#CB333A"),  # Define gradient colors
                      values = scales::rescale(c(0.5, 1, 1.5)), limits = c(0.5, 1.5), 
                      #low = "white", high = "#CB333A",
                      #                  limits = c(1, 3),
                      oob = scales::squish, # Define gradient colors
                      guide = guide_colorbar(
                        barwidth = 1.5/5.08,  # Adjust width of the color bar
                        barheight = 15/5.08   # Adjust height of the color bar
                      )
  ) + labs(x = NULL, y = NULL)  +
  theme(
    axis.title = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )


fileName <- here(figDir, "heatmap_hub_enrichment_dotplot_all")
width <- panelSize(1.65)*mmToInch
height <- panelSize(1.1)*mmToInch
# # png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
# # print(p)
# # dev.off()
svglite(paste0(fileName, ".svg"),  height = height, width = width)
print(p)
dev.off()
```



#### PE-PE loops
```{r}
results <- tibble(i = numeric(), upno = numeric(), down = numeric())

for(i in seq(1, 10)){
  data <- fread(here(refDir, "Dylan_hub_esc.csv"))
  
  data.hub <- data %>% dplyr::filter(all_lcon > i)
  temp <- data.hub %>% dplyr::select(c(1, 2, 3))
  colnames(temp) <- c("chr", "start", "end")
  hub.anchor <- makeGRangesFromDataFrame(temp)
  
  
  
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_UP_diff0.2.bedpe"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_NO_diff0.2.bedpe"))
anchor.upno <- extractAnchor(bind_rows(loop.up, loop.no))

loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_DOWN_diff0.2.bedpe"))
anchor.down <- extractAnchor(loop.down)


  
  overlap.upno <- findOverlaps(anchor.upno, hub.anchor)
  overlap.down <- findOverlaps(anchor.down, hub.anchor)
  
  n1 <- length(unique(queryHits(overlap.upno)))
  n2 <- length(unique(queryHits(overlap.down)))
  
  perc1 <- round(n1/length(overlap.upno)*100, 2)
  perc2 <- round(n2/length(overlap.down)*100, 2)
  
  results <- results %>% add_row(i = i, upno = perc1, down = perc2)
}



results_long <- results %>%
  pivot_longer(cols = c(upno, down), names_to = "Group", values_to = "Percentage")
results_long$Group <- factor(results_long$Group, levels = c("upno", "down"))

# Create the bar plot
ggplot(results_long, aes(x = factor(i), y = Percentage, fill = Group)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(
    title = "PE-PE loops",
    x = "Presence of hub anchor with >i connections",
    y = "Percentage"
  ) +
  theme_minimal() +
  scale_fill_manual(values = c("upno" = "blue", "down" = "red")) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) + ylim(0, 100)
```


#### Str loops
```{r}
results <- tibble(i = numeric(), upno = numeric(), down = numeric())

for(i in seq(1, 10)){
  data <- fread(here(refDir, "Dylan_hub_esc.csv"))
  
  data.hub <- data %>% dplyr::filter(all_lcon > i)
  temp <- data.hub %>% dplyr::select(c(1, 2, 3))
  colnames(temp) <- c("chr", "start", "end")
  hub.anchor <- makeGRangesFromDataFrame(temp)
  
  
  
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure_dTAGvsDMSO_UP_diff0.2.bedpe"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure_dTAGvsDMSO_NO_diff0.2.bedpe"))
anchor.upno <- extractAnchor(bind_rows(loop.up, loop.no))

loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure_dTAGvsDMSO_DOWN_diff0.2.bedpe"))
anchor.down <- extractAnchor(loop.down)

  
  overlap.upno <- findOverlaps(anchor.upno, hub.anchor)
  overlap.down <- findOverlaps(anchor.down, hub.anchor)
  
  n1 <- length(unique(queryHits(overlap.upno)))
  n2 <- length(unique(queryHits(overlap.down)))
  
  perc1 <- round(n1/length(overlap.upno)*100, 2)
  perc2 <- round(n2/length(overlap.down)*100, 2)
  
  results <- results %>% add_row(i = i, upno = perc1, down = perc2)
}



results_long <- results %>%
  pivot_longer(cols = c(upno, down), names_to = "Group", values_to = "Percentage")
results_long$Group <- factor(results_long$Group, levels = c("upno", "down"))

# Create the bar plot
ggplot(results_long, aes(x = factor(i), y = Percentage, fill = Group)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(
    title = "Structure loops",
    x = "Presence of hub anchor with >i connections",
    y = "Percentage"
  ) +
  theme_minimal() +
  scale_fill_manual(values = c("upno" = "blue", "down" = "red")) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) + ylim(0, 100)
```
#### TSS
```{r}
results <- tibble(i = numeric(), perc1 = numeric(), perc2 = numeric())

for(i in seq(1, 10)){
  data <- fread(here(refDir, "Dylan_hub_episc.csv"))
  
  data.hub <- data %>% dplyr::filter(all_lcon > i)
  temp <- data.hub %>% dplyr::select(c(1, 2, 3))
  colnames(temp) <- c("chr", "start", "end")
  hub.anchor <- makeGRangesFromDataFrame(temp)
  
  
  
  ## Checking overlap with gene TSS
  tss <- fread(here(refDir, "mm10_GRCm38.p6_TSS2.5kb.bed"))
  
  tss.group1 <- tss %>% dplyr::filter(V6 %in% group1) %>% dplyr::select(V1, V2, V3)
  colnames(tss.group1) <- c("chr", "start", "end")
  tss.group1.gr <- makeGRangesFromDataFrame(tss.group1)
  
  tss.group2 <- tss %>% dplyr::filter(V6 %in% group2) %>% dplyr::select(V1, V2, V3)
  colnames(tss.group2) <- c("chr", "start", "end")
  tss.group2.gr <- makeGRangesFromDataFrame(tss.group2)
  
  
  overlap.group1 <- findOverlaps(tss.group1.gr, hub.anchor)
  overlap.group2 <- findOverlaps(tss.group2.gr, hub.anchor)
  
  n1 <- length(unique(queryHits(overlap.group1)))
  n2 <- length(unique(queryHits(overlap.group2)))
  
  perc1 <- round(n1/nrow(tss.group1)*100, 2)
  perc2 <- round(n2/nrow(tss.group2)*100, 2)
  
  results <- results %>% add_row(i = i, perc1 = perc1, perc2 = perc2)
}



results_long <- results %>%
  pivot_longer(cols = c(perc1, perc2), names_to = "Group", values_to = "Percentage")

# Create the bar plot
ggplot(results_long, aes(x = factor(i), y = Percentage, fill = Group)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(
    title = "Percentage by 'i' for perc1 and perc2",
    x = "Presence of hub anchor with >i connections",
    y = "Percentage"
  ) +
  theme_minimal() +
  scale_fill_manual(values = c("perc1" = "blue", "perc2" = "red")) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
```

###[2.30] H3K27ac peak number per 10kb
```{r}
process_group <- function(group_name, gene_data, bins) {
  # Filter for the specific group
  TSS_group <- makeGRangesFromDataFrame(
    gene_data %>% filter(ensembl %in% get(group_name)), 
    keep.extra.columns = TRUE
  )
  
  # Process each TSS in the group
  all_results <- map_dfr(seq_len(length(TSS_group)), function(i) {
    # Find overlapping bins
    overlapping_bins <- findOverlaps(TSS_group[i], bins)
    bins_near_tss <- as_tibble(bins[subjectHits(overlapping_bins)])
    
    # Calculate distance bin
    TSScenter <- (as_tibble(TSS_group[i]) %>%
                    mutate(center = (start + end) / 2))$center
    centerBinStart <- floor(TSScenter / 10e3) * 10e3 + 1
    bins_near_tss <- bins_near_tss %>%
      mutate(distanceBin = abs((start - centerBinStart)) / 10e3)
    
    # Summarize results
    result <- bins_near_tss %>%
      group_by(distanceBin) %>%
      summarise(mean_peak_counts = mean(peak_counts, na.rm = TRUE), .groups = "drop") %>%
      mutate(gene = TSS_group[i]$ensembl)
    return(result)
  })
  
  # Calculate mean and SD for the group
  mean_data <- all_results %>%
    group_by(distanceBin) %>%
    summarise(mean_peak_counts = mean(mean_peak_counts, na.rm = TRUE))
  
  sd_data <- all_results %>%
    group_by(distanceBin) %>%
    summarise(sd_peak_counts = sd(mean_peak_counts, na.rm = TRUE))
  
  # Join and add group name
  summary_data <- left_join(mean_data, sd_data, by = "distanceBin") %>%
    mutate(group = group_name)
  
  return(summary_data)
}

##########################################################



## Import peak
refDir <- here("../..", "reference")
#peak.H3K27ac <- importPeak(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.narrowPeak.bed"))
peak.H3K27ac <- importPeak(here(refDir, "33255_H3K4me3_04-745_Bruce-4_peaks.mergePeak.bed"))
#peak.H3K27ac <- importPeak(here(refDir, "33250_RAD21_ab992_Bruce-4_peaks.mergePeak.bed"))

## Import 10kb bin
temp <- fread(here(refDir, "mm10.bin.10kb.bed"))
colnames(temp) <- c("chr", "start", "end")
temp <- temp %>% dplyr::mutate(start = start+1)
bins.10kb <- makeGRangesFromDataFrame(temp)

## Count overlap
counts <- countOverlaps(bins.10kb, peak.H3K27ac)
mcols(bins.10kb)$peak_counts <- counts

## Getting TSS
flankSize <- 1e6
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3),
                TSSstart = TSS - flankSize,
                TSSend = TSS + flankSize) %>%
  dplyr::select(V1, TSSstart, TSSend, V6)
colnames(gene.tb) <- c("chr", "start", "end", "ensembl")

gene.group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
gene.group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene

#TSS1mb.group1.gr <- makeGRangesFromDataFrame(gene.tb %>% dplyr::filter(ensembl %in% gene.group1), keep.extra.columns = TRUE)
#TSS1mb.group2.gr <- makeGRangesFromDataFrame(gene.tb %>% dplyr::filter(ensembl %in% gene.group2), keep.extra.columns = TRUE)

##########################################################
# List of groups to process
groups <- c("gene.group1", "gene.group2")

# Process each group and combine results
summary_data <- map_dfr(groups, ~ process_group(.x, gene.tb, bins.10kb))

# Add the additional rows for distanceBin = 0
summary_data <- summary_data %>%
  mutate(distanceBin = distanceBin + 1) %>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "gene.group1") %>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "gene.group2")

# Plot the results
p <- ggplot(summary_data, aes(x = distanceBin, y = mean_peak_counts, color = group)) + 
  geom_line(size = 0.5) +
  labs(
    x = "Distance bin (10kb)",
    y = "H3K4me3 peak count"
  ) +
  theme_classic() +
  theme(
    axis.title = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS),
    legend.position = c(0.8, 0.8)  # Moves legend inside the plot (x, y relative coordinates)
  ) +
  guides(
    fill = guide_legend(
      keywidth = 0.2,  # Adjust the width of the legend keys
      keyheight = 0.2  # Adjust the height of the legend keys
    ))+
  scale_color_manual(values = rev(c("#777777", "#F28E2C")))

##########################################################

fileName <- paste0("binPeakDensity_RAD21_binaryGroup")
width <- 32*mmToInch
height <- 35*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```


```{r}

####### DOing this for P-S groups
resultDir <- here("../../result")
temp2 <- readRDS(here(resultDir, "gene_loop_link.rds"))

psOver4 <- (temp2 %>% dplyr::filter(num_ps >= 4))$gene
psOver3 <- (temp2 %>% dplyr::filter(num_ps >= 3, num_ps < 4))$gene
psOver2 <- (temp2 %>% dplyr::filter(num_ps >= 2, num_ps < 3))$gene
psOver1 <- (temp2 %>% dplyr::filter(num_ps >= 1, num_ps < 2))$gene
psOver0 <- (temp2 %>% dplyr::filter(num_ps < 1))$gene

# List of groups to process
groups <- c("psOver0", "psOver1", "psOver2", "psOver3", "psOver4")

# Process each group and combine results
summary_data <- map_dfr(groups, ~ process_group(.x, gene.tb, bins.10kb))

# Add the additional rows for distanceBin = 0
summary_data <- summary_data %>%
  mutate(distanceBin = distanceBin + 1) %>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "psOver0") %>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "psOver1") %>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "psOver2") %>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "psOver3")%>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "psOver4")

# Plot the results
p <- ggplot(summary_data, aes(x = distanceBin, y = mean_peak_counts, color = group)) + 
  geom_line(size = 0.5) +
  labs(
    x = "Distance bin (10kb)",
    y = "H3K4me3 peak count"
  ) +
  theme_classic() +
  theme(
    axis.title = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS),
    legend.position = c(0.8, 0.8)  # Moves legend inside the plot (x, y relative coordinates)
  ) +
  guides(
    fill = guide_legend(
      keywidth = 0.2,  # Adjust the width of the legend keys
      keyheight = 0.2  # Adjust the height of the legend keys
    ))+
  scale_color_manual(values = c("#777777", "#8B7E65", "#A28452", "#C2884D", "#F28E2C"))

fileName <- paste0("binPeakDensity_RAD21_psGroup11")
width <- 32*mmToInch
height <- 35*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

### PE
peOver4 <- (temp2 %>% dplyr::filter(num_pe >= 4))$gene
peOver3 <- (temp2 %>% dplyr::filter(num_pe >= 3, num_pe < 4))$gene
peOver2 <- (temp2 %>% dplyr::filter(num_pe >= 2, num_pe < 3))$gene
peOver1 <- (temp2 %>% dplyr::filter(num_pe >= 1, num_pe < 2))$gene
peOver0 <- (temp2 %>% dplyr::filter(num_pe < 1))$gene

# List of groups to process
groups <- c("peOver0", "peOver1", "peOver2", "peOver3", "peOver4")

# Process each group and combine results
summary_data <- map_dfr(groups, ~ process_group(.x, gene.tb, bins.10kb))

# Add the additional rows for distanceBin = 0
summary_data <- summary_data %>%
  mutate(distanceBin = distanceBin + 1) %>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "peOver0") %>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "peOver1") %>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "peOver2") %>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "peOver3")%>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "peOver4")

# Plot the results
ggplot(summary_data, aes(x = distanceBin, y = mean_peak_counts, color = group)) + 
  geom_line(size = 1) + xlim(0, 10)+
  labs(
    title = "Mean Peak Counts",
    x = "Distance Bin (10 kb)",
    y = "Mean Peak Counts"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
    axis.text = element_text(size = 12),
    axis.title = element_text(size = 13)
  )

###############
ppOver4 <- (temp2 %>% dplyr::filter(num_pp >= 4))$gene
ppOver3 <- (temp2 %>% dplyr::filter(num_pp >= 3, num_pp < 4))$gene
ppOver2 <- (temp2 %>% dplyr::filter(num_pp >= 2, num_pp < 3))$gene
ppOver1 <- (temp2 %>% dplyr::filter(num_pp >= 1, num_pp < 2))$gene
ppOver0 <- (temp2 %>% dplyr::filter(num_pp < 1))$gene

# List of groups to process
groups <- c("ppOver0", "ppOver1", "ppOver2", "ppOver3", "ppOver4")

# Process each group and combine results
summary_data <- map_dfr(groups, ~ process_group(.x, gene.tb, bins.10kb))

# Add the additional rows for distanceBin = 0
summary_data <- summary_data %>%
  mutate(distanceBin = distanceBin + 1) %>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "ppOver0") %>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "ppOver1") %>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "ppOver2") %>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "ppOver3")%>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "ppOver4")

# Plot the results
ggplot(summary_data, aes(x = distanceBin, y = mean_peak_counts, color = group)) + 
  geom_line(size = 1) +
  labs(
    title = "Mean Peak Counts",
    x = "Distance Bin (10 kb)",
    y = "Mean Peak Counts"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
    axis.text = element_text(size = 12),
    axis.title = element_text(size = 13)
  )
```
### [2.31] Testing SEs?
```{r}
peak.Whyte.SE <- importPeak(here(refDir, "superEnhancer_Whyte_ESC_mm10.bed"))
peak.Dylan.SE <- importPeak(here(refDir, "superEnhancer_Dylan_ESC.bed"))



flankSize <- 0.5e6
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3),
                TSSstart = TSS - flankSize,
                TSSend = TSS + flankSize) %>%
  dplyr::select(V1, TSSstart, TSSend, V6, V5)
colnames(gene.tb) <- c("chr", "start", "end", "ensembl", "gene")

gene.group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
gene.group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene

TSS1mb.group1.gr <- makeGRangesFromDataFrame(gene.tb %>% dplyr::filter(ensembl %in% gene.group1), keep.extra.columns = TRUE)
TSS1mb.group2.gr <- makeGRangesFromDataFrame(gene.tb %>% dplyr::filter(ensembl %in% gene.group2), keep.extra.columns = TRUE)


length(unique(queryHits(findOverlaps(TSS1mb.group1.gr, peak.Whyte.SE))))/length(gene.group1)*100
length(unique(queryHits(findOverlaps(TSS1mb.group2.gr, peak.Whyte.SE))))/length(gene.group2)*100

length(unique(queryHits(findOverlaps(TSS1mb.group1.gr, peak.Dylan.SE))))/length(gene.group1)*100
length(unique(queryHits(findOverlaps(TSS1mb.group2.gr, peak.Dylan.SE))))/length(gene.group2)*100

#View(as_tibble(TSS1mb.group1.gr[unique(queryHits(findOverlaps(TSS1mb.group1.gr, peak.Whyte.SE)))]))
#View(as_tibble(TSS1mb.group1.gr[unique(queryHits(findOverlaps(TSS1mb.group1.gr, peak.Dylan.SE)))]))

gene.group1.SE <- (as_tibble(TSS1mb.group1.gr[unique(queryHits(findOverlaps(TSS1mb.group1.gr, peak.Dylan.SE)))]))$ensembl
gene.group1.noSE <- gene.group1[!(gene.group1 %in% gene.group1.SE)]

overlaps <- findOverlaps(TSS1mb.group2.gr, peak.Dylan.SE)
unique_hits <- unique(queryHits(overlaps))
gene.group2.SE <- as_tibble(TSS1mb.group2.gr[unique_hits])$ensembl
gene.group2.noSE <- as_tibble(TSS1mb.group2.gr[-unique_hits])$ensembl


##########################################################
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3),
                TSSstart = TSS - 1e6,
                TSSend = TSS + 1e6) %>%
  dplyr::select(V1, TSSstart, TSSend, V6, V5)
colnames(gene.tb) <- c("chr", "start", "end", "ensembl", "gene")

##########################################################
# List of groups to process
groups <- c("gene.group1.SE", "gene.group1.noSE")

# Process each group and combine results
summary_data <- map_dfr(groups, ~ process_group(.x, gene.tb, bins.10kb))

# Add the additional rows for distanceBin = 0
summary_data <- summary_data %>%
  mutate(distanceBin = distanceBin + 1) %>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "gene.group1.SE") %>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "gene.group1.noSE")

# Plot the results
ggplot(summary_data, aes(x = distanceBin, y = mean_peak_counts, color = group)) + 
  geom_line(size = 1) +
  labs(
    title = paste0("Mean Peak Counts, +-", flankSize, "bp, Whyte SE"),
    x = "Distance Bin (10 kb)",
    y = "Mean Peak Counts"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
    axis.text = element_text(size = 12),
    axis.title = element_text(size = 13)
  )

##########################################################
# List of groups to process
groups <- c("gene.group1.SE", "gene.group1.noSE", "gene.group2.SE", "gene.group2.noSE")

# Process each group and combine results
summary_data <- map_dfr(groups, ~ process_group(.x, gene.tb, bins.10kb))

# Add the additional rows for distanceBin = 0
summary_data <- summary_data %>%
  mutate(distanceBin = distanceBin + 1) %>%
    add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "gene.group1.SE") %>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "gene.group1.noSE") %>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "gene.group2.SE") %>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "gene.group2.noSE")

# Plot the results
ggplot(summary_data, aes(x = distanceBin, y = mean_peak_counts, color = group)) + 
  geom_line(size = 1) +
  labs(
    title = paste0("Mean Peak Counts, +-", flankSize, "bp, Whyte SE"),
    x = "Distance Bin (10 kb)",
    y = "Mean Peak Counts"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
    axis.text = element_text(size = 12),
    axis.title = element_text(size = 13)
  )
##########################################################
# List of groups to process
groups <- c("gene.group2.SE", "gene.group2.noSE")

# Process each group and combine results
summary_data <- map_dfr(groups, ~ process_group(.x, gene.tb, bins.10kb))

# Add the additional rows for distanceBin = 0
summary_data <- summary_data %>%
  mutate(distanceBin = distanceBin + 1) %>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "gene.group2.SE") %>%
  add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "gene.group2.noSE")

# Plot the results
ggplot(summary_data, aes(x = distanceBin, y = mean_peak_counts, color = group)) + 
  geom_line(size = 1) +
  labs(
    title = paste0("Mean Peak Counts, +-", flankSize, "bp, Whyte SE"),
    x = "Distance Bin (10 kb)",
    y = "Mean Peak Counts"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
    axis.text = element_text(size = 12),
    axis.title = element_text(size = 13)
  )

## Checking P-S in the subset of genes

resultDir <- here("../../result")
temp2 <- readRDS(here(resultDir, "gene_loop_link.rds"))

temp3 <- temp2 %>% dplyr::filter(gene %in% c(gene.group1)) %>%
  dplyr::mutate(group = case_when(gene %in% gene.group1.SE ~ "SE",
                                  TRUE ~ "noSE"))


ggplot(temp3, aes(x = group, y = num_ps)) + geom_violin(aes(fill = group)) + geom_boxplot(width = 0.1) + theme_classic()

getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$num_pp
  distance2 <- (data %>% dplyr::filter(group ==group2) )$num_pp
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}


getPvalWilcox(temp3, "noSE", "SE")
```

### [2.32] Checking directionality
```{r}
name <- "chromo_cons_annoHierarchy"
diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = diffCutoff)%>% 
  dplyr::mutate(distance = start2 - start1)


geneAnnoData <- geneAnnoData %>% unnest(gene)


gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3)) %>%
  dplyr::select(V6, TSS)
colnames(gene.tb) <- c("ensembl", "TSS")


geneAnnoData  <- geneAnnoData %>% dplyr::left_join(gene.tb, by = c("gene" = "ensembl"))

data <- geneAnnoData %>% dplyr::select(c(1, 2, 3, 4, 5, 6, 29, 31, 35)) %>%
  rowwise() %>%
  dplyr::mutate(distToAnchor1 = (start1 + end1)/2 - TSS,
                distToAnchor2 = (start2 + end2)/2 - TSS,
                distToAnchor = if_else(abs(distToAnchor1) > abs(distToAnchor2), distToAnchor1, distToAnchor2),
                direction = if_else(distToAnchor > 0, "right", "left"))

data.directionality <- data %>% group_by(gene) %>%
  summarise(count = n(),
            n_right = sum(direction == "right"),
            n_left = sum(direction == "left")) %>%
  dplyr::filter(count > 1) %>%
  dplyr::mutate(n = n_right + n_left,
                directionality = abs((n_right - n_left)/n))


#ggplot(data.directionality, aes(x = directionality)) + geom_histogram() + theme_classic()


gene.group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
gene.group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene

temp <- data.directionality %>% dplyr::filter(gene %in% c(gene.group1, gene.group2)) %>%
  dplyr::mutate(group = if_else(gene %in% gene.group1, "group1", "group2"))


ggplot(temp, aes(x = group, y = directionality)) + geom_violin() + geom_boxplot(width = 0.1) + 
  geom_hline(yintercept = 0) + theme_classic() + ggtitle("P-S") +
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")


#########################PS

getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$directionality
  distance2 <- (data %>% dplyr::filter(group ==group2) )$directionality
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}


resultDir <- here("../../result")
temp2 <- readRDS(here(resultDir, "gene_loop_link.rds"))
#####################

psOver4 <- (temp2 %>% dplyr::filter(num_ps >= 4))$gene
psOver3 <- (temp2 %>% dplyr::filter(num_ps >= 3, num_ps < 4))$gene
psOver2 <- (temp2 %>% dplyr::filter(num_ps >= 2, num_ps < 3))$gene
psOver1 <- (temp2 %>% dplyr::filter(num_ps >= 1, num_ps < 2))$gene
psOver0 <- (temp2 %>% dplyr::filter(num_ps < 1))$gene

temp <- data.directionality %>% dplyr::filter(gene %in% c(psOver0, psOver1, psOver2, psOver3, psOver4)) %>%
  dplyr::mutate(group = case_when(gene %in% psOver0 ~ "psOver0",
                                  gene %in% psOver1 ~ "psOver1",
                                  gene %in% psOver2 ~ "psOver2",
                                  gene %in% psOver3 ~ "psOver3",
                                  gene %in% psOver4 ~ "psOver4"))

p34 <- round(getPvalWilcox(temp, "psOver3", "psOver4"), 5)
p23 <- round(getPvalWilcox(temp, "psOver2", "psOver3"), 5)
p12 <- round(getPvalWilcox(temp, "psOver1", "psOver2"), 5)
p01 <- round(getPvalWilcox(temp, "psOver0", "psOver1"), 5)
p04 <- round(getPvalWilcox(temp, "psOver0", "psOver4"), 5)



ggplot(temp, aes(x = group, y = directionality)) + geom_violin() + geom_boxplot(width = 0.1) + 
  geom_hline(yintercept = 0) + theme_classic() + ggtitle("P-N") +
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  annotate("text", x = 1, y = 0.5, label = paste0("p34: ", convPvalue(p34), "\n",
                                                "p23: ", convPvalue(p23), "\n",
                                                "p12: ", convPvalue(p12), "\n",
                                                "p01: ", convPvalue(p01), "\n",
                                                "p04: ", convPvalue(p04), "\n"),
           color = "black", hjust = 0, size = 3)


#################### pe

peOver4 <- (temp2 %>% dplyr::filter(num_pe >= 4))$gene
peOver3 <- (temp2 %>% dplyr::filter(num_pe >= 3, num_pe < 4))$gene
peOver2 <- (temp2 %>% dplyr::filter(num_pe >= 2, num_pe < 3))$gene
peOver1 <- (temp2 %>% dplyr::filter(num_pe >= 1, num_pe < 2))$gene
peOver0 <- (temp2 %>% dplyr::filter(num_pe < 1))$gene

temp <- data.directionality %>% dplyr::filter(gene %in% c(peOver0, peOver1, peOver2, peOver3, peOver4)) %>%
  dplyr::mutate(group = case_when(gene %in% peOver0 ~ "peOver0",
                                  gene %in% peOver1 ~ "peOver1",
                                  gene %in% peOver2 ~ "peOver2",
                                  gene %in% peOver3 ~ "peOver3",
                                  gene %in% peOver4 ~ "peOver4"))

p34 <- round(getPvalWilcox(temp, "peOver3", "peOver4"), 5)
p23 <- round(getPvalWilcox(temp, "peOver2", "peOver3"), 5)
p12 <- round(getPvalWilcox(temp, "peOver1", "peOver2"), 5)
p01 <- round(getPvalWilcox(temp, "peOver0", "peOver1"), 5)
p04 <- round(getPvalWilcox(temp, "peOver0", "peOver4"), 5)



ggplot(temp, aes(x = group, y = directionality)) + geom_violin() + geom_boxplot(width = 0.1) + 
  geom_hline(yintercept = 0) + theme_classic() + ggtitle("P-N") +
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  annotate("text", x = 1, y = 0.5, label = paste0("p34: ", convPvalue(p34), "\n",
                                                "p23: ", convPvalue(p23), "\n",
                                                "p12: ", convPvalue(p12), "\n",
                                                "p01: ", convPvalue(p01), "\n",
                                                "p04: ", convPvalue(p04), "\n"),
           color = "black", hjust = 0, size = 3)


#################### pp

ppOver4 <- (temp2 %>% dplyr::filter(num_pp >= 4))$gene
ppOver3 <- (temp2 %>% dplyr::filter(num_pp >= 3, num_pp < 4))$gene
ppOver2 <- (temp2 %>% dplyr::filter(num_pp >= 2, num_pp < 3))$gene
ppOver1 <- (temp2 %>% dplyr::filter(num_pp >= 1, num_pp < 2))$gene
ppOver0 <- (temp2 %>% dplyr::filter(num_pp < 1))$gene

temp <- data.directionality %>% dplyr::filter(gene %in% c(ppOver0, ppOver1, ppOver2, ppOver3, ppOver4)) %>%
  dplyr::mutate(group = case_when(gene %in% ppOver0 ~ "ppOver0",
                                  gene %in% ppOver1 ~ "ppOver1",
                                  gene %in% ppOver2 ~ "ppOver2",
                                  gene %in% ppOver3 ~ "ppOver3",
                                  gene %in% ppOver4 ~ "ppOver4"))

p34 <- round(getPvalWilcox(temp, "ppOver3", "ppOver4"), 5)
p23 <- round(getPvalWilcox(temp, "ppOver2", "ppOver3"), 5)
p12 <- round(getPvalWilcox(temp, "ppOver1", "ppOver2"), 5)
p01 <- round(getPvalWilcox(temp, "ppOver0", "ppOver1"), 5)
p04 <- round(getPvalWilcox(temp, "ppOver0", "ppOver4"), 5)



ggplot(temp, aes(x = group, y = directionality)) + geom_violin() + geom_boxplot(width = 0.1) + 
  geom_hline(yintercept = 0) + theme_classic() + ggtitle("P-N") +
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  annotate("text", x = 1, y = 0.5, label = paste0("p34: ", convPvalue(p34), "\n",
                                                "p23: ", convPvalue(p23), "\n",
                                                "p12: ", convPvalue(p12), "\n",
                                                "p01: ", convPvalue(p01), "\n",
                                                "p04: ", convPvalue(p04), "\n"),
           color = "black", hjust = 0, size = 3)


```

### [2.33] Checking guidance
```{r}
name <- "chromo_cons_annoHierarchy"
diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = diffCutoff)%>% 
  dplyr::mutate(distance = start2 - start1)

geneAnnoData <- geneAnnoData %>% unnest(gene)

gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3)) %>%
  dplyr::select(V6, TSS)
colnames(gene.tb) <- c("ensembl", "TSS")


geneAnnoData  <- geneAnnoData %>% dplyr::left_join(gene.tb, by = c("gene" = "ensembl"))

data <- geneAnnoData %>% dplyr::select(c(1, 2, 3, 4, 5, 6, 29, 31, 35)) %>%
  rowwise() %>%
  dplyr::mutate(distToAnchor1 = (start1 + end1)/2 - TSS,
                distToAnchor2 = (start2 + end2)/2 - TSS,
                distToAnchor = if_else(abs(distToAnchor1) > abs(distToAnchor2), distToAnchor1, distToAnchor2),
                direction = if_else(distToAnchor > 0, "right", "left"))


result <- data %>%
  group_by(gene, direction) %>%
  mutate(distToAnchor = abs(distToAnchor)) %>%
  mutate(max_dist_P_S = ifelse(any(Anno2 == "P-S"), 
                               max(distToAnchor[Anno2 == "P-S"], na.rm = TRUE), 
                               NA)) %>%
  mutate(is_smaller = ifelse(is.na(max_dist_P_S), NA, 
                             ifelse(distToAnchor == max_dist_P_S, NA, distToAnchor < max_dist_P_S))) %>%
  ungroup()



result2 <- result %>% dplyr::filter(!is.na(is_smaller))

result3 <- result2 %>% group_by(gene, direction) %>%
  summarize(count_true = sum(is_smaller, na.rm = TRUE),
            count_false = sum(is_smaller == FALSE, na.rm = TRUE)) %>%
  dplyr::mutate(count = count_true + count_false,
                perc = count_true/count*100) %>%
  dplyr::select(gene, direction, perc)

result3 <- result3 %>% group_by(gene) %>% summarize(perc = mean(perc))

####### DOing this for P-S groups
resultDir <- here("../../result")
temp2 <- readRDS(here(resultDir, "gene_loop_link.rds"))

psOver4 <- (temp2 %>% dplyr::filter(num_ps >= 4))$gene
psOver3 <- (temp2 %>% dplyr::filter(num_ps >= 3, num_ps < 4))$gene
psOver2 <- (temp2 %>% dplyr::filter(num_ps >= 2, num_ps < 3))$gene
psOver1 <- (temp2 %>% dplyr::filter(num_ps >= 1, num_ps < 2))$gene
psOver0 <- (temp2 %>% dplyr::filter(num_ps < 1))$gene


result4 <- result3 %>% dplyr::mutate(group = case_when(gene %in% psOver0 ~ "psOver0",
                                            gene %in% psOver1 ~ "psOver1",
                                            gene %in% psOver2 ~ "psOver2",
                                            gene %in% psOver3 ~ "psOver3",
                                            gene %in% psOver4 ~ "psOver4",
                                            TRUE ~ NA)) %>%
  dplyr::filter(!is.na(group))


ggplot(result4, aes(x =  group, y = perc)) + geom_violin() + geom_boxplot(width = 0.05) + theme_classic() +
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")

####### DOing this for P-E groups
peOver4 <- (temp2 %>% dplyr::filter(num_pe >= 4))$gene
peOver3 <- (temp2 %>% dplyr::filter(num_pe >= 3, num_pe < 4))$gene
peOver2 <- (temp2 %>% dplyr::filter(num_pe >= 2, num_pe < 3))$gene
peOver1 <- (temp2 %>% dplyr::filter(num_pe >= 1, num_pe < 2))$gene
peOver0 <- (temp2 %>% dplyr::filter(num_pe < 1))$gene


result4 <- result3 %>% dplyr::mutate(group = case_when(gene %in% peOver0 ~ "peOver0",
                                            gene %in% peOver1 ~ "peOver1",
                                            gene %in% peOver2 ~ "peOver2",
                                            gene %in% peOver3 ~ "peOver3",
                                            gene %in% peOver4 ~ "peOver4",
                                            TRUE ~ NA)) %>%
  dplyr::filter(!is.na(group))


ggplot(result4, aes(x =  group, y = perc)) + geom_violin() + geom_boxplot(width = 0.05) + theme_classic() +
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")

####### DOing this for P-P groups

ppOver4 <- (temp2 %>% dplyr::filter(num_pp >= 4))$gene
ppOver3 <- (temp2 %>% dplyr::filter(num_pp >= 3, num_pp < 4))$gene
ppOver2 <- (temp2 %>% dplyr::filter(num_pp >= 2, num_pp < 3))$gene
ppOver1 <- (temp2 %>% dplyr::filter(num_pp >= 1, num_pp < 2))$gene
ppOver0 <- (temp2 %>% dplyr::filter(num_pp < 1))$gene


result4 <- result3 %>% dplyr::mutate(group = case_when(gene %in% ppOver0 ~ "ppOver0",
                                            gene %in% ppOver1 ~ "ppOver1",
                                            gene %in% ppOver2 ~ "ppOver2",
                                            gene %in% ppOver3 ~ "ppOver3",
                                            gene %in% ppOver4 ~ "ppOver4",
                                            TRUE ~ NA)) %>%
  dplyr::filter(!is.na(group))


ggplot(result4, aes(x =  group, y = perc)) + geom_violin() + geom_boxplot(width = 0.05) + theme_classic() +
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")


```

```{r}

result <- data %>%
  group_by(gene, direction) %>%
  mutate(distToAnchor = abs(distToAnchor)) %>%
  mutate(max_dist_P_S = ifelse(any(Anno2 == "P-S"), 
                               max(distToAnchor[Anno2 == "P-S"], na.rm = TRUE), 
                               NA))
result <- result %>% dplyr::filter(Anno2 != "P-S") %>%
  mutate(is_smaller = ifelse(is.na(max_dist_P_S), NA, 
                             ifelse(distToAnchor == max_dist_P_S, NA, distToAnchor < max_dist_P_S))) %>%
  ungroup()



result2 <- result %>% dplyr::filter(!is.na(is_smaller))

result3 <- result2 %>% group_by(gene, direction) %>%
  summarize(count_true = sum(is_smaller, na.rm = TRUE),
            count_false = sum(is_smaller == FALSE, na.rm = TRUE)) %>%
  dplyr::mutate(count = count_true + count_false,
                perc = count_true/count*100) %>%
  dplyr::select(gene, direction, perc)

result3 <- result3 %>% group_by(gene) %>% summarize(perc = mean(perc))

####### DOing this for P-S groups
resultDir <- here("../../result")
temp2 <- readRDS(here(resultDir, "gene_loop_link.rds"))

psOver4 <- (temp2 %>% dplyr::filter(num_ps >= 4))$gene
psOver3 <- (temp2 %>% dplyr::filter(num_ps >= 3, num_ps < 4))$gene
psOver2 <- (temp2 %>% dplyr::filter(num_ps >= 2, num_ps < 3))$gene
psOver1 <- (temp2 %>% dplyr::filter(num_ps >= 1, num_ps < 2))$gene
psOver0 <- (temp2 %>% dplyr::filter(num_ps < 1))$gene


result4 <- result3 %>% dplyr::mutate(group = case_when(gene %in% psOver0 ~ "psOver0",
                                            gene %in% psOver1 ~ "psOver1",
                                            gene %in% psOver2 ~ "psOver2",
                                            gene %in% psOver3 ~ "psOver3",
                                            gene %in% psOver4 ~ "psOver4",
                                            TRUE ~ NA)) %>%
  dplyr::filter(!is.na(group))


ggplot(result4, aes(x =  group, y = perc)) + geom_violin() + geom_boxplot(width = 0.05) + theme_classic() +
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")

####### DOing this for P-E groups
peOver4 <- (temp2 %>% dplyr::filter(num_pe >= 4))$gene
peOver3 <- (temp2 %>% dplyr::filter(num_pe >= 3, num_pe < 4))$gene
peOver2 <- (temp2 %>% dplyr::filter(num_pe >= 2, num_pe < 3))$gene
peOver1 <- (temp2 %>% dplyr::filter(num_pe >= 1, num_pe < 2))$gene
peOver0 <- (temp2 %>% dplyr::filter(num_pe < 1))$gene


result4 <- result3 %>% dplyr::mutate(group = case_when(gene %in% peOver0 ~ "peOver0",
                                            gene %in% peOver1 ~ "peOver1",
                                            gene %in% peOver2 ~ "peOver2",
                                            gene %in% peOver3 ~ "peOver3",
                                            gene %in% peOver4 ~ "peOver4",
                                            TRUE ~ NA)) %>%
  dplyr::filter(!is.na(group))


ggplot(result4, aes(x =  group, y = perc)) + geom_violin() + geom_boxplot(width = 0.05) + theme_classic() +
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")

####### DOing this for P-P groups

ppOver4 <- (temp2 %>% dplyr::filter(num_pp >= 4))$gene
ppOver3 <- (temp2 %>% dplyr::filter(num_pp >= 3, num_pp < 4))$gene
ppOver2 <- (temp2 %>% dplyr::filter(num_pp >= 2, num_pp < 3))$gene
ppOver1 <- (temp2 %>% dplyr::filter(num_pp >= 1, num_pp < 2))$gene
ppOver0 <- (temp2 %>% dplyr::filter(num_pp < 1))$gene


result4 <- result3 %>% dplyr::mutate(group = case_when(gene %in% ppOver0 ~ "ppOver0",
                                            gene %in% ppOver1 ~ "ppOver1",
                                            gene %in% ppOver2 ~ "ppOver2",
                                            gene %in% ppOver3 ~ "ppOver3",
                                            gene %in% ppOver4 ~ "ppOver4",
                                            TRUE ~ NA)) %>%
  dplyr::filter(!is.na(group))


ggplot(result4, aes(x =  group, y = perc)) + geom_violin() + geom_boxplot(width = 0.05) + theme_classic() +
  stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")

```

### [2.34] Checking CTCF motif orientation
#### Calculating CTCF motif
```{r}
peak.CTCF <- importPeak(here(refDir, "33248_CTCF_07-729_Bruce-4_peaks.mergePeak.bed"))

library(BSgenome.Mmusculus.UCSC.mm10)

# Extract sequences
sequences <- getSeq(BSgenome.Mmusculus.UCSC.mm10, peak.CTCF)


# Get the CTCF motif (or provide your own PWM)
ctcf_motif <- query(MotifDb, c("CTCF", "Mmusculus"))[[1]]

# Scan for the motif
motif_hits <- lapply(sequences, function(seq) {
    matchPWM(ctcf_motif, seq, min.score = "80%") # Adjust min.score as needed
})

motif_hits_reverse <- lapply(sequences, function(seq) {
    matchPWM(ctcf_motif, reverseComplement(seq), min.score = "80%")
})

# Combine results
result <- mapply(function(fwd, rev) {
    list(forward = fwd, reverse = rev)
}, motif_hits, motif_hits_reverse, SIMPLIFY = FALSE)


result[[5]]


result_tibble <- tibble(
    peak_id = seq_along(result), # Create an identifier for each peak
    alignment = map_chr(result, function(res) {
        has_fwd <- length(res$forward) > 0
        has_rev <- length(res$reverse) > 0
        
        if (has_fwd && has_rev) "fwdrev"
        else if (has_fwd) "fwd"
        else if (has_rev) "rev"
        else "none"
    })
)

temp <- as_tibble(peak.CTCF) %>% bind_cols(result_tibble) %>%
  dplyr::select(seqnames, start, end, alignment)

fwrite(temp, here(refDir, "33248_CTCF_07-729_Bruce-4_peaks.mergePeak.motifAnnotated.bed"), col.names = FALSE, sep = "\t")
```
#### Comparing it to TSS
```{r}
ctcf.peak <- fread(here(refDir, "33248_CTCF_07-729_Bruce-4_peaks.mergePeak.motifAnnotated.bed"))
colnames(ctcf.peak) <- c("chr", "start", "end", "motif")
ctcf.peak.gr <- makeGRangesFromDataFrame(ctcf.peak, keep.extra.columns = TRUE)


flankSize <- 1
gene.TSS.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3),
                TSSstart = TSS - flankSize,
                TSSend = TSS + flankSize) %>%
  dplyr::select(V1, TSSstart, TSSend, V4, V5, V6)
colnames(gene.TSS.tb) <- c("chr", "start", "end", "strand", "gene", "ensembl")

tss.gr <- makeGRangesFromDataFrame(gene.TSS.tb, keep.extra.columns = TRUE)

fwd_peaks <- ctcf.peak.gr[ctcf.peak.gr$motif %in% c("fwd", "fwdrev")]
rev_peaks <- ctcf.peak.gr[ctcf.peak.gr$motif %in% c("rev", "fwdrev")]

# Find convergent CTCF pairs for each TSS
convergent_pairs <- lapply(seq_along(tss.gr), function(i) {
    current_tss <- tss.gr[i]
    
    # Filter peaks on the same chromosome as the TSS
    fwd_peaks_chr <- fwd_peaks[as.character(seqnames(fwd_peaks)) == as.character(seqnames(current_tss))]
    rev_peaks_chr <- rev_peaks[as.character(seqnames(rev_peaks)) == as.character(seqnames(current_tss))]
    
    # Find the closest forward peak to the left of the TSS
    left_fwd <- fwd_peaks_chr[start(fwd_peaks_chr) < start(current_tss)]
    closest_fwd <- if (length(left_fwd) > 0) {
      left_fwd[which.max(start(left_fwd))]
    } else {
      NA
    }
    
    # Find the closest reverse peak to the right of the TSS
    right_rev <- rev_peaks_chr[start(rev_peaks_chr) > start(current_tss)]
    closest_rev <- if (length(right_rev) > 0) {
      right_rev[which.min(start(right_rev))]
    } else {
      NA
    }
    
    # Combine results if both closest peaks exist
    if (!is.na(closest_fwd) && !is.na(closest_rev)) {
        tibble(
          chrom1 = as.character(seqnames(closest_fwd)),
          start1 = start(closest_fwd),
          end1 = end(closest_fwd),
          chrom2 = as.character(seqnames(closest_rev)),
          start2 = start(closest_rev),
          end2 = end(closest_rev),
          ensembl = current_tss$ensembl,
        )
    } else {
        NULL
    }
})

# Combine results into a tibble
result_tibble <- bind_rows(convergent_pairs)

fwrite(result_tibble, here(refDir, "tss_convergent_ctcf_motif_pairs.bedpe"), sep = "\t", col.names = FALSE)
fwrite(gene.TSS.tb %>% dplyr::select(c(1, 2, 3, 5)), here(refDir, "tss_2bp.bed"), sep = "\t", col.names = FALSE)

temp <- fread(here(refDir, "tss_convergent_ctcf_motif_pairs.bedpe")) %>% dplyr::filter(V7 == "Klf4")

fwrite(as_tibble(temp), here(refDir, "tss_convergent_ctcf_motif_pairs_klf4.bedpe"), sep = "\t", col.names = FALSE)

# Checking size distribution of ctcf pair
temp <- fread(here(refDir, "tss_convergent_ctcf_motif_pairs.bedpe"))
temp <- temp %>% dplyr::mutate(size = (V6 + V5)/2 - (V3 + V2)/2,
                       id = paste(V1, V2, V3, V5, V6, sep = "_"))
temp <- temp %>% dplyr::select(id, size) %>% distinct()

ggplot(temp, aes(x = size)) + geom_histogram() + scale_x_continuous(labels = label_kb_mb, limits = c(0, 1e6)) + scale_y_log10() + theme_classic() + ggtitle("size distribution of smallest convergent ctcf pairs around gene")
```
#### Filtering loops that fall within CTCF boundaries
```{r}
flankSize <- 1
gene.TSS.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3),
                TSSstart = TSS - flankSize,
                TSSend = TSS + flankSize) %>%
  dplyr::select(V1, TSSstart, TSSend, V4, V5, V6)
colnames(gene.TSS.tb) <- c("chr", "start", "end", "strand", "gene", "ensembl")
geneEnsemblPair <- gene.TSS.tb %>% dplyr::select(gene, ensembl)

boundary.pair <- fread(here(refDir, "tss_convergent_ctcf_motif_pairs.bedpe"))
colnames(boundary.pair) <- c("boundary_chrom1", "boundary_start1", "boundary_end1", "boundary_chrom2", "boundary_start2", "boundary_end2", "ensembl")
boundary.pair <- boundary.pair %>% dplyr::left_join(geneEnsemblPair, by = c("ensembl"))


# Importing loop gene annotation
name <- "chromo_cons_annoHierarchy"
diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = diffCutoff,
                                      annoList = c("P-P", "P-E")) %>%
  dplyr::select(c(1, 2, 3, 4, 5, 6, 11, 12, 24, 29, 31)) %>% unnest(gene)


data <- geneAnnoData %>% dplyr::left_join(boundary.pair, by = c("gene" = "ensembl"))

data <- data %>% rowwise() %>% dplyr::mutate(isWithinLeftBd = (boundary_start1 <= min(end1, end2)),
                       isWithinRightBd = (boundary_end2 >= max(start1, start2)),
                       isWithinBd = isWithinLeftBd & isWithinRightBd)
data <- data %>% drop_na()


ggplot(data, aes(x = DMSO, y = dTAG)) + geom_point() + coord_fixed() +  
  geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
  geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
  theme_classic() + facet_wrap(~ isWithinBd)

ggplot(data, aes(x = isWithinBd, y = diff_dTAG_DMSO, fill = isWithinBd)) + geom_violin() + geom_boxplot(width = 0.1) + theme_classic() +
  geom_hline(yintercept = 0)


getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(isWithinBd ==group1) )$diff_dTAG_DMSO
  distance2 <- (data %>% dplyr::filter(isWithinBd ==group2) )$diff_dTAG_DMSO
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

getPvalWilcox(data, TRUE, FALSE)


# Proportion of those gene between group1 and group2
data.isWithinBd <- data %>% dplyr::filter(isWithinBd)

gene.group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
gene.group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene

sum(gene.group1 %in% data.isWithinBd$gene)/length(gene.group1)*100

sum(gene.group2 %in% data.isWithinBd$gene)/length(gene.group2)*100

# Checking size of those loops
data <- data %>% dplyr::mutate(distance = start2 - start1)

ggplot(data, aes(x = isWithinBd, y = distance, fill = isWithinBd)) + geom_violin() + geom_boxplot(width = 0.1) + theme_classic()



```


### [2.35] Checking CTCF motif presence at gene TSS
```{r}
flankSize <- 2.5e3
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3),
                TSSstart = TSS - flankSize,
                TSSend = TSS + flankSize) %>%
  dplyr::select(V1, TSSstart, TSSend, V6, V5)
colnames(gene.tb) <- c("chr", "start", "end", "ensembl", "gene")

gene.group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
gene.group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene

TSS1mb.group1.gr <- makeGRangesFromDataFrame(gene.tb %>% dplyr::filter(ensembl %in% gene.group1), keep.extra.columns = TRUE)
TSS1mb.group2.gr <- makeGRangesFromDataFrame(gene.tb %>% dplyr::filter(ensembl %in% gene.group2), keep.extra.columns = TRUE)


ctcf.peak <- fread(here(refDir, "33248_CTCF_07-729_Bruce-4_peaks.mergePeak.motifAnnotated.bed"))
colnames(ctcf.peak) <- c("chr", "start", "end", "motif")
ctcf.peak.gr <- makeGRangesFromDataFrame(ctcf.peak)


print("+-2.5kb TSS overla with CTCF peak")
length(unique(queryHits(findOverlaps(TSS1mb.group1.gr, ctcf.peak.gr))))/length(gene.group1)*100
length(unique(queryHits(findOverlaps(TSS1mb.group2.gr, ctcf.peak.gr))))/length(gene.group2)*100



ctcf.peak.gr <- makeGRangesFromDataFrame(ctcf.peak %>% dplyr::filter(motif != "none"))
print("+-2.5kb TSS overla with CTCF peak with motif")
length(unique(queryHits(findOverlaps(TSS1mb.group1.gr, ctcf.peak.gr))))/length(gene.group1)*100
length(unique(queryHits(findOverlaps(TSS1mb.group2.gr, ctcf.peak.gr))))/length(gene.group2)*100




```

### [2.36] Finding farthest P-S
```{r}
name <- "chromo_cons_annoHierarchy"
diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = diffCutoff,
                                      annoList = c("P-P", "P-E", "P-S", "P-X")) %>%
  dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 11, 12, 24, 29, 31)) %>% unnest(gene)


gene.TSS.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3)) %>%
  dplyr::select(V6, TSS)
colnames(gene.TSS.tb) <- c("ensembl", "TSS")


geneAnnoData <- geneAnnoData %>% dplyr::left_join(gene.TSS.tb, by = c("gene" = "ensembl"))

geneAnnoData <- geneAnnoData %>% 
  dplyr::mutate(distance1 = (start1 + end1)/2 - TSS,
                distance2 = (start2 + end2)/2 - TSS,
                distance = if_else(abs(distance1) > abs(distance2), distance1, distance2))

result <- geneAnnoData %>%
  filter(Anno2 == "P-S") %>%
  group_by(gene) %>%
  summarise(
    farRight = if (any(distance > 0)) max(distance[distance > 0]) else 0,  # Return 0 if no positive distance
    farLeft = if (any(distance < 0)) min(distance[distance < 0]) else 0  # Return 0 if no negative distance
  ) %>%
  ungroup()

geneAnnoData <- geneAnnoData %>% left_join(result, by = c("gene"))

geneAnnoData <- geneAnnoData %>% dplyr::filter(Anno2 %in% c("P-P", "P-E")) %>%
  dplyr::mutate(group = case_when(is.na(farRight) ~ "No",
                                  (distance > farLeft) & (distance < farRight) ~ "Within",
                                  TRUE ~ "Outside"),
                size = start2 - start1)

data <- geneAnnoData %>% dplyr::select(group, id, size) %>% distinct()
anchor.data <- geneAnnoData %>% dplyr::select(chrom1, start1, end1, chrom2, start2, end2, group) %>% distinct()

temp1 <- anchor.data %>% dplyr::filter(group == "No") %>% dplyr::select(-group)
fwrite(temp1, here(consensusDir, "insulated_domain_ps_no.bedpe"), sep = "\t", col.names = FALSE)
temp2 <- anchor.data %>% dplyr::filter(group == "Within") %>% dplyr::select(-group)
fwrite(temp2, here(consensusDir, "insulated_domain_ps_within.bedpe"), sep = "\t", col.names = FALSE)
temp3 <- anchor.data %>% dplyr::filter(group == "Outside") %>% dplyr::select(-group)
fwrite(temp3, here(consensusDir, "insulated_domain_ps_outside.bedpe"), sep = "\t", col.names = FALSE)


getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$size
  distance2 <- (data %>% dplyr::filter(group ==group2) )$size
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

# convPvalue(getPvalWilcox(data, "noBoundary", "withinBoundary"))
# convPvalue(getPvalWilcox(data, "noBoundary", "outsideBoundary"))
# convPvalue(getPvalWilcox(data, "outsideBoundary", "withinBoundary"))


p <- ggplot(data, aes(x = group, y = size, fill = group)) + 
  geom_violin(linewidth = lineThick * mmToLineUnit, lineend = "square", alpha = .4, , show.legend = FALSE) + 
  geom_boxplot(width = 0.3, color = "black",
                 linewidth = lineThick * mmToLineUnit, lineend = "square",
                 outlier.shape = NA,  alpha = 0.6, show.legend = FALSE) + 
  theme_classic() + scale_y_continuous(labels = label_kb_mb) +
  stat_summary(
      aes(group = group), fun = mean,
      geom = "point", shape = 21, size = 0.5,
      fill = "black", color = "black", position = position_dodge(.3)
    ) +
  scale_fill_manual(values = c("grey50", "#5EC962", "#5EC962")) +

  theme(
    axis.title.x = element_blank(),
    axis.title.y = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),    
    axis.text.x = element_text(
      size = fontSizeM,
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  ) +labs(y = "Loop size")

fileName <- paste0("insulationBoundary_size")
width <- panelSize(1.5)*mmToInch
height <- panelSize(1.5)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
  
########################
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$diff_dTAG_DMSO
  distance2 <- (data %>% dplyr::filter(group ==group2) )$diff_dTAG_DMSO
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

data <- geneAnnoData %>% dplyr::select(group, id, diff_dTAG_DMSO) %>% distinct()

# convPvalue(getPvalWilcox(data, "noBoundary", "withinBoundary"))
# convPvalue(getPvalWilcox(data, "noBoundary", "outsideBoundary"))
# convPvalue(getPvalWilcox(data, "outsideBoundary", "withinBoundary"))

p <- ggplot(data, aes(x = group, y = diff_dTAG_DMSO, fill = group)) + 
  geom_violin(linewidth = lineThick * mmToLineUnit, lineend = "square", alpha = .4, , show.legend = FALSE) + 
  geom_boxplot(width = 0.3, color = "black",
               linewidth = lineThick * mmToLineUnit, lineend = "square",
               outlier.shape = NA,  alpha = 0.6, show.legend = FALSE) + 
  theme_classic() + 
  stat_summary(
    aes(group = group), fun = mean,
    geom = "point", shape = 21, size = 0.5,
    fill = "black", color = "black", position = position_dodge(.3)
  ) +
  scale_fill_manual(values = c("grey50", "#5EC962", "#5EC962")) +

  theme(
    axis.title.x = element_blank(),
    axis.title.y = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),    
    axis.text.x = element_text(
      size = fontSizeM,
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  ) +labs(y = "Δ loop score")  +    
  geom_hline(yintercept = 0,
             alpha = 1,
             color = "black",
             size = lineThick*mmToLineUnit,
             lineend = "square") + 
  geom_hline(yintercept = -0.2,
             alpha = 0.5, 
             color = "black",
             size = lineThick*mmToLineUnit,
             lineend = "square", linetype = "dashed") +
  coord_cartesian(ylim = c(-0.8, 0.5))
    
fileName <- paste0("insulationBoundary_delta")
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()  
  
  
 ###################### 
    
    minValue <- -4
  obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
  dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
                log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)),
                log_obsexp_diff_dTAG_DMSO = log_obsexp_dTAG - log_obsexp_DMSO,
                log_obsexp_diff_A485_DMSO = log_obsexp_A485 - log_obsexp_DMSO)

geneAnnoData <- geneAnnoData %>% left_join(obsexp, by = c("id"))

data <- geneAnnoData %>% dplyr::select(group, id, log_obsexp_diff_dTAG_DMSO) %>% distinct()


getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$log_obsexp_diff_dTAG_DMSO
  distance2 <- (data %>% dplyr::filter(group ==group2) )$log_obsexp_diff_dTAG_DMSO
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

# convPvalue(getPvalWilcox(data, "noBoundary", "withinBoundary"))
# convPvalue(getPvalWilcox(data, "noBoundary", "outsideBoundary"))
# convPvalue(getPvalWilcox(data, "outsideBoundary", "withinBoundary"))
# 

p <- ggplot(data, aes(x = group, y = log_obsexp_diff_dTAG_DMSO, fill = group)) + 
  geom_violin(linewidth = lineThick * mmToLineUnit, lineend = "square", alpha = .4, , show.legend = FALSE) + 
  geom_boxplot(width = 0.3, color = "black",
               linewidth = lineThick * mmToLineUnit, lineend = "square",
               outlier.shape = NA,  alpha = 0.6, show.legend = FALSE) + 
  theme_classic() + 
  stat_summary(
    aes(group = group), fun = mean,
    geom = "point", shape = 21, size = 0.5,
    fill = "black", color = "black", position = position_dodge(.3)
  ) +
  scale_fill_manual(values = c("grey50", "#5EC962", "#5EC962")) +

  theme(
    axis.title.x = element_blank(),
    axis.title.y = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),    
    axis.text.x = element_text(
      size = fontSizeM,
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  ) +labs(y = "log2(fc of obs/exp)")  +    
  geom_hline(yintercept = 0,
             alpha = 1,
             color = "black",
             size = lineThick*mmToLineUnit,
             lineend = "square") + 
  coord_cartesian(ylim = c(-2, 2))
    
fileName <- paste0("insulationBoundary_obsexp")
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()  





#####################


gene.withinBoundary <- unique((geneAnnoData %>% dplyr::filter(group == "withinBoundary"))$gene)
gene.outsideBoundary <- unique((geneAnnoData %>% dplyr::filter(group == "outsideBoundary"))$gene)
gene.noBoundary <- unique((geneAnnoData %>% dplyr::filter(group == "noBoundary"))$gene)

gene.wBoundary <- unique(c(gene.withinBoundary, gene.outsideBoundary))

gene.group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
gene.group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene

print("perc of gene with loop within Boundary")
sum(gene.group1 %in% gene.withinBoundary)/length(gene.group1)*100
sum(gene.group2 %in% gene.withinBoundary)/length(gene.group2)*100

print("perc of gene with loop outside Boundary")
sum(gene.group1 %in% gene.outsideBoundary)/length(gene.group1)*100
sum(gene.group2 %in% gene.outsideBoundary)/length(gene.group2)*100

print("perc of gene with loop with no Boundary")
sum(gene.group1 %in% gene.noBoundary)/length(gene.group1)*100
sum(gene.group2 %in% gene.noBoundary)/length(gene.group2)*100


diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name) %>%
  dplyr::mutate(group = case_when(
    ensembl_gene_id %in% gene.wBoundary ~ "withinBoundary",
    ensembl_gene_id %in% gene.noBoundary ~ "noBoundary",
    TRUE ~ NA
  )) %>%
  dplyr::filter(!is.na(group)) %>%
  dplyr::mutate(absLog2FC = abs(log2FoldChange))


ggplot(diff.RNA, aes(x = group, y = log2FoldChange)) + geom_violin() + geom_boxplot(width = 0.1)

getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$absLog2FC
  distance2 <- (data %>% dplyr::filter(group ==group2) )$absLog2FC
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}
convPvalue(getPvalWilcox(diff.RNA, "withinBoundary", "noBoundary"))


# Create the CDF plot
p <- ggplot(diff.RNA, aes(x = absLog2FC, color = group)) +
stat_ecdf(size = 0.4 ) + # Use stat_ecdf to plot the empirical CDF
  labs(
    x = "Absolute log2(fold change)",
    y = "Cumulative Probability"
  ) + coord_cartesian(xlim = c(0, 0.5)) +
  theme_classic() + # Clean theme
  theme(
      axis.title = element_text(
        size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
    )

fileName <- paste0("log2FC_cdf_insulationBoundary")
width <- panelSize(2.5)*mmToInch
height <- panelSize(1.5)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()



### Checking characterstics of anchors
temp1 <- anchor.data %>% dplyr::filter(group == "noBoundary") %>% dplyr::select(-group)
anchor.noBoundary <- extractAnchor(temp1)

temp2 <- anchor.data %>% dplyr::filter(group == "withinBoundary") %>% dplyr::select(-group)
anchor.withinBoundary <- extractAnchor(temp2)

temp3 <- anchor.data %>% dplyr::filter(group == "outsideBoundary") %>% dplyr::select(-group)
anchor.outsideBoundary <- extractAnchor(temp3)


getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(loop ==group1) )$sumScore
  distance2 <- (data %>% dplyr::filter(loop ==group2) )$sumScore
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

getSumScores <- function(track, anchor) {
  # Find overlaps between all anchors and track regions at once
  overlaps <- findOverlaps(anchor, track)
  
  # Extract the scores and corresponding anchor indices
  anchor_indices <- queryHits(overlaps)
  track_scores <- score(track)[subjectHits(overlaps)]
  
  # Use tapply to calculate the median scores for each anchor
  median_scores <- tapply(track_scores, anchor_indices, mean, na.rm = TRUE)
  
  # Initialize a numeric vector to store the median scores for each anchor
  all_median_scores <- rep(NA, length(anchor))
  
  # Populate the median scores for the anchors that have overlaps
  all_median_scores[as.numeric(names(median_scores))] <- median_scores
  
  return(all_median_scores)
}

plotSumScoresBinary <- function(track, peak, name, anchor1, anchor2, anchor3){
  peakTrack <- track[unique(queryHits(findOverlaps(track, peak)))]
  a <- getSumScores(peakTrack, anchor1)
  b <- getSumScores(peakTrack, anchor2)
  c <- getSumScores(peakTrack, anchor3)
  a.tb <- tibble(loop = "noBoundary",
                 sumScore = a)
  b.tb <- tibble(loop = "withBoundary",
                 sumScore = b)
  c.tb <- tibble(loop = "outsideBoundary",
                 sumScore = c)
  
  data <- bind_rows(a.tb, b.tb, c.tb) %>% drop_na()
  data$loop <- factor(data$loop, levels = c("noBoundary", "withBoundary", "outsideBoundary"))
  
  p12 <- getPvalWilcox(data, "noBoundary", "withBoundary")
  p13 <- getPvalWilcox(data, "noBoundary", "outsideBoundary")
  p23 <- getPvalWilcox(data, "withBoundary", "outsideBoundary")
  p <-  ggplot(data, aes(x = loop, y = sumScore)) + 
    labs(x = NULL, y = paste0(name, "\nlog(avg peak score per anchor)")) +
    geom_violin(aes(fill = loop), color = "black",
                linewidth = lineThick * mmToLineUnit, lineend = "square",
                show.legend = FALSE) +
    geom_boxplot(width = 0.1, color = "black",
                 linewidth = lineThick * mmToLineUnit, lineend = "square",
                 outlier.size = 1, outlier.stroke = NA) + theme_classic() +
    stat_summary(
      aes(group = loop), fun = mean,
      geom = "point", shape = 21, size = 1,
      fill = "red", color = "black"
    ) + theme(
      axis.title = element_text(
        size = fontSizeS, family = fontType, color = "#000000"
      ),
      axis.text = element_text(
        size = fontSizeS, family = fontType, color = "#000000"
      ),
      axis.text.x = element_text(
        angle = 45, hjust = 1, vjust = 1
      ),
      axis.line = element_line(
        color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
      ),
      axis.ticks = element_line(
        color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
      ),
      panel.background = element_rect(fill = "transparent"),
      legend.text = element_text(family = fontType, size = fontSizeS),
      legend.title = element_text(family = fontType, size = fontSizeS)
    ) + 
    #coord_cartesian(ylim = c(quantile(data$sumScore, 0.0), quantile(data$sumScore, 0.99))) + 
    annotate(
      "text", x = 1, y = quantile(data$sumScore, 0.5),
      label = paste0("p12: ", convPvalue(p12), "\n",
                     "p13: ", convPvalue(p13), "\n",
                     "p23: ", convPvalue(p23)),
      color = "black", hjust = 0, size = 3
    ) 
  #+ scale_y_continuous(trans = "log10")
  
  fileName <- paste0("ChIP_peak_avgPeakScore_boundaryAnchor_", name)
  width <- panelSize(2)*mmToInch
  height <- panelSize(2)*mmToInch
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}

# Sum peak score
####
track <- import(here(refDir, "33255_H3K4me3_04-745_Bruce-4_trim_q20_dedup_black_depthNorm.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "33255_H3K4me3_04-745_Bruce-4_peaks.mergePeak.bed"))
plotSumScoresBinary(track, peak, "H3K4me3", anchor.noBoundary, anchor.withinBoundary, anchor.outsideBoundary)

track <- import(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.narrowPeak.bed"))
#plotSumScores(track, peak, "H3K27ac")
plotSumScoresBinary(track, peak, "H3K27ac", anchor.noBoundary, anchor.withinBoundary, anchor.outsideBoundary)

track <- import(here(refDir, "GSM2683440_J1_H3K14ac_mm10Lifted.black.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "GSM2683440_J1_H3K14ac_mm10Lifted.bed"))
#plotSumScores(track, peak, "H3K14ac")
plotSumScoresBinary(track, peak, "H3K14ac", anchor.noBoundary, anchor.withinBoundary, anchor.outsideBoundary)

track <- import(here(refDir, "33248_CTCF_07-729_Bruce-4_trim_q20_dedup_black_depthNorm.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "33248_CTCF_07-729_Bruce-4_peaks.mergePeak.bed"))
#plotSumScores(track, peak, "CTCF")
plotSumScoresBinary(track, peak, "CTCF", anchor.noBoundary, anchor.withinBoundary, anchor.outsideBoundary)

track <- import(here(refDir, "33250_RAD21_ab992_Bruce-4_trim_q20_dedup_black_depthNorm.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "33250_RAD21_ab992_Bruce-4_peaks.mergePeak.bed"))
#plotSumScores(track, peak, "RAD21")
plotSumScoresBinary(track, peak, "RAD21", anchor.noBoundary, anchor.withinBoundary, anchor.outsideBoundary)



######## LOLA
atac <- fread(here(refDir, "GSM3106257_ATAC_ESC_1.bed")) %>% dplyr::select(V1, V2, V3)
colnames(atac) <- c("chr", "start", "end")
atac.gr <- makeGRangesFromDataFrame(atac)

# LOADING LOOPS
loop.all <- fread(here(consensusDir, "chromo_cons_annoHierarchy.tsv")) %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "E-E"))
anchor.all <- (extractAnchor(loop.all))
overlaps <- findOverlaps(anchor.all, atac.gr)
anchor.all <- pintersect(anchor.all[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])

overlaps <- findOverlaps(anchor.noBoundary, atac.gr)
anchor.noBoundary <- pintersect(anchor.noBoundary[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
overlaps <- findOverlaps(anchor.withinBoundary, atac.gr)
anchor.withinBoundary <- pintersect(anchor.withinBoundary[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
overlaps <- findOverlaps(anchor.outsideBoundary, atac.gr)
anchor.outsideBoundary <- pintersect(anchor.outsideBoundary[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])



anchors <- list(anchor.noBoundary, anchor.withBoundary, anchor.outsideBoundary)
tbs <- list()
temps <- list()

alpha <- 0.05

# Process clusters c1 to c8
for (i in 1:3) {
  
  anchor <- anchors[[i]]
  # Run LOLA
  result <- runLOLA(anchor, anchor.all, lolaDB)
  tb <- as_tibble(result)
  
  # Filter and summarize
  tb <- tb %>%
    dplyr::mutate(target = toupper(antibody)) %>%
    filter(str_to_lower(cellType) == "embryonic stem cell") %>%
    dplyr::filter(qValue < alpha) %>%
    dplyr::group_by(target) %>%
    slice_min(meanRnk, with_ties = FALSE)
  
  # Store tb
  tbs[[i]] <- tb
  
  # Select and rename oddsRatio
  temp <- tb %>% dplyr::select(target, oddsRatio)
  colnames(temp) <- c("target", paste0("OR_c", i))
  
  # Store temp
  temps[[i]] <- temp
}

# Merge all temp tables into one
temp <- Reduce(function(x, y) full_join(x, y, by = "target"), temps) %>%
  mutate_all(~replace_na(., 1))
colnames(temp) <- c("target", "noBoundary", "withinBoundary", "outsideBoundary")
data <- temp %>% column_to_rownames("target") %>% as.matrix()

library(circlize)
col_fun <- colorRamp2(c(1, max(data)), c("white", "red"))


#fviz_nbclust(data, kmeans, method = "wss")

p <- Heatmap(
  data,
  name = "Odds Ratio",                   # Name of the heatmap legend
  cluster_columns = FALSE,            # Remove column dendrogram
  show_row_dend = FALSE,
  col = col_fun,
  border = TRUE
)
```

### [3.37] Checking RAD21 level at tss
```{r}
tss.5kb.group1 <- fread(here(refDir, "TSS_binaryGroup1.bed")) %>%
  dplyr::mutate(start = (V2 + V3)/2 - 2500,
                end = (V2 + V3)/2 + 2500,
                chr = V1) %>%
  dplyr::select(chr, start, end)
tss.5kb.group1 <- makeGRangesFromDataFrame(tss.5kb.group1)

tss.5kb.group2 <- fread(here(refDir, "TSS_binaryGroup2.bed")) %>%
  dplyr::mutate(start = (V2 + V3)/2 - 2500,
                end = (V2 + V3)/2 + 2500,
                chr = V1) %>%
  dplyr::select(chr, start, end)
tss.5kb.group2 <- makeGRangesFromDataFrame(tss.5kb.group2)


plotSumScoresBinary <- function(track, peak, name, anchor1, anchor2){
  peakTrack <- track[unique(queryHits(findOverlaps(track, peak)))]
  a <- getSumScores(peakTrack, anchor1)
  b <- getSumScores(peakTrack, anchor2)
  a.tb <- tibble(loop = "group1",
                 sumScore = a)
  b.tb <- tibble(loop = "group2",
                 sumScore = b)
  
  data <- bind_rows(a.tb, b.tb) %>% drop_na()
  data$loop <- factor(data$loop, levels = c("group1", "group2"))
  
  p12 <- getPvalWilcox(data, "group1", "group2")
  p <-  ggplot(data, aes(x = loop, y = sumScore)) + 
    labs(x = NULL, y = paste0(name, "\nlog(avg peak score per anchor)")) +
    geom_violin(aes(fill = loop), color = "black",
                linewidth = lineThick * mmToLineUnit, lineend = "square",
                show.legend = FALSE) +
    geom_boxplot(width = 0.1, color = "black",
                 linewidth = lineThick * mmToLineUnit, lineend = "square",
                 outlier.size = 1, outlier.stroke = NA) + theme_classic() +
    stat_summary(
      aes(group = loop), fun = mean,
      geom = "point", shape = 21, size = 1,
      fill = "red", color = "black"
    ) + theme(
      axis.title = element_text(
        size = fontSizeS, family = fontType, color = "#000000"
      ),
      axis.text = element_text(
        size = fontSizeS, family = fontType, color = "#000000"
      ),
      axis.text.x = element_text(
        angle = 45, hjust = 1, vjust = 1
      ),
      axis.line = element_line(
        color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
      ),
      axis.ticks = element_line(
        color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
      ),
      panel.background = element_rect(fill = "transparent"),
      legend.text = element_text(family = fontType, size = fontSizeS),
      legend.title = element_text(family = fontType, size = fontSizeS)
    ) + 
    #coord_cartesian(ylim = c(quantile(data$sumScore, 0.0), quantile(data$sumScore, 0.99))) + 
    annotate(
      "text", x = 1, y = quantile(data$sumScore, 0.5),
      label = paste0("p12: ", convPvalue(p12)),
      color = "black", hjust = 0, size = 3
    ) 
  #+ scale_y_continuous(trans = "log10")
  
  fileName <- paste0("ChIP_peak_avgPeakScore_binaryGroup_", name)
  width <- panelSize(2)*mmToInch
  height <- panelSize(2)*mmToInch
  png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
  print(p)
  dev.off()
  svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
  print(p)
  dev.off()
}

# Sum peak score
####
track <- import(here(refDir, "33248_CTCF_07-729_Bruce-4_trim_q20_dedup_black_depthNorm.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "33248_CTCF_07-729_Bruce-4_peaks.mergePeak.bed"))
plotSumScoresBinary(track, peak, "CTCF", tss.5kb.group1, tss.5kb.group2)

track <- import(here(refDir, "33250_RAD21_ab992_Bruce-4_trim_q20_dedup_black_depthNorm.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "33250_RAD21_ab992_Bruce-4_peaks.mergePeak.bed"))
plotSumScoresBinary(track, peak, "RAD21", tss.5kb.group1, tss.5kb.group2)



```

### [3.38] Stripe percentage
```{r}
# Create the data
data <- data.frame(
  Group = c("Group1", "Group2"),
  Value = c(31.03, 10.64)
)

# Create the bar plot
p <- ggplot(data, aes(x = Group, y = Value, fill = Group)) + 
  geom_bar(stat = "identity", color = "black", width = 0.7, alpha = 0.6, show.legend = FALSE,
           linewidth = lineThick * mmToLineUnit, lineend = "square") + # Use identity for raw values
  labs(y = "% of TSS on stripe") + 
  theme_classic() +
  theme(
    axis.title = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )

fileName <- paste0("perc_on_stripe_binaryGroup")
width <- panelSize(1.25)*mmToInch
height <- panelSize(1.2)*mmToInch
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```

###[3.39] Splitting bedpe
```{r}
temp <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>% dplyr::select(c(1, 2))
geneList <- (temp %>% dplyr::filter(external_gene_name %in% c("Klf4", "Tbx3", "Jun", "Fosl2", "Myc", "Phlda1")))$ensembl_gene_id

# LOAD ANNOTATED LOOP
name <- "chromo_cons_annoHierarchy"
diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = diffCutoff,
                                      annoList = c("P-P", "P-E", "P-S", "P-X"))%>%
  dplyr::mutate(distance = start2 - start1)

filteredLoop <- geneAnnoData %>% unnest(gene) %>% dplyr::filter(gene %in% geneList)
fwrite(filteredLoop, here(consensusDir, paste0("P-N_selectedGene.bedpe")), 
         sep = "\t", col.names = FALSE)
  
temploop <- filteredLoop %>% dplyr::filter(Anno2 %in% c("P-P", "P-E"))
fwrite(temploop, here(consensusDir, paste0("P-PE_selectedGene.bedpe")), 
         sep = "\t", col.names = FALSE)
  
temploop <- filteredLoop %>% dplyr::filter(Anno2 %in% c("P-S"))
fwrite(temploop, here(consensusDir, paste0("P-S_selectedGene.bedpe")), 
         sep = "\t", col.names = FALSE)
```

### [3.40] Finding largest encompassing S-S
```{r}
name <- "chromo_cons_annoHierarchy"

# IMPORT S-S loops
allLoops <- fread(here(consensusDir, "chromo_cons_annoHierarchy.tsv"))
strLoops <- allLoops %>% dplyr::filter(Anno2 %in% c("S-S"))
temp <- strLoops %>% dplyr::select(c(1, 2, 6))
colnames(temp) <- c("chrom", "start", "end")
strLoopsGr <- makeGRangesFromDataFrame(temp)

# IMPORT TSS
flankSize <- 10
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3),
                TSSstart = TSS - flankSize,
                TSSend = TSS + flankSize) %>%
  dplyr::select(V1, TSSstart, TSSend, V6, V5)
colnames(gene.tb) <- c("chrom", "start", "end", "ensembl", "gene")
geneGr <- makeGRangesFromDataFrame(gene.tb, keep.extra.columns = TRUE)


# Find overlaps between loops and TSS
overlaps <- findOverlaps(geneGr, strLoopsGr)

# Annotate overlaps
tss_with_loops <- geneGr[queryHits(overlaps)]
loops_with_tss <- strLoopsGr[subjectHits(overlaps)]

# Combine into a data frame for processing
loop_data <- data.frame(gene = tss_with_loops$ensembl,
                        loop_chr = seqnames(loops_with_tss),
                        loop_start = start(loops_with_tss),
                        loop_end = end(loops_with_tss),
                        loop_width = width(loops_with_tss))

# Identify the largest loop for each gene
largest_loops <- loop_data[order(loop_data$gene, -loop_data$loop_width), ]
largest_loops <- largest_loops[!duplicated(largest_loops$gene), ]
largest_loops <- largest_loops %>% dplyr::mutate(loopID = paste(loop_chr, loop_start, loop_end, sep = "_"))
largest_loops <- largest_loops[order(largest_loops$loop_chr, largest_loops$loop_start),]
rownames(largest_loops) <- NULL

# Checking smaller loops
allLoopsGr <- makeGRangesFromDataFrame(allLoops, 
                                       seqnames.field = "chrom1", 
                                       start.field = "start1", 
                                       end.field = "end2")
largestLoopsGr <- makeGRangesFromDataFrame(largest_loops, 
                                           seqnames.field = "loop_chr", 
                                           start.field = "loop_start", 
                                           end.field = "loop_end")
largestLoopsGr <- sort(unique(largestLoopsGr))
complete_overlaps <- subsetByOverlaps(allLoopsGr, largestLoopsGr, type = "within")
overlaps <- findOverlaps(complete_overlaps, largestLoopsGr)


completeOverlapDf <- as.data.frame(complete_overlaps) %>% dplyr::mutate(loopIDAll = paste(seqnames, start, end, sep = "_"))
largestLoopsDf <- as.data.frame(largestLoopsGr) %>% dplyr::mutate(loopID = paste(seqnames, start, end, sep = "_"))

loopIDpairs <- data.frame(loopID = completeOverlapDf$loopIDAll[queryHits(overlaps)],
                          loopID2 = largestLoopsDf$loopID[subjectHits(overlaps)])

allLoopsAnnotated <- allLoops %>% dplyr::mutate(loopID = paste(chrom1, start1, end2, sep = "_")) %>% dplyr::left_join(loopIDpairs, by = c("loopID"))

temp <- allLoopsAnnotated %>% dplyr::filter(Anno2 %in% c("P-E", "P-P", "E-E")) %>% dplyr::mutate(hasSS = !is.na(loopID2))
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(hasSS ==group1) )$diff_dTAG_DMSO
  distance2 <- (data %>% dplyr::filter(hasSS ==group2) )$diff_dTAG_DMSO
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}
pv <- convPvalue(getPvalWilcox(temp, TRUE, FALSE))
ggplot(temp, aes(x = hasSS, y = diff_dTAG_DMSO)) + geom_violin(aes(fill = hasSS), show.legend = FALSE) + 
  geom_boxplot(width = 0.1, outlier.shape = NA) +
  stat_summary(aes(group = hasSS), fun = mean, 
               geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  theme_classic() +
  annotate("text", x = 1, y = 0, label = pv, size = 5, color = "black") +
  ggtitle("PE-PE") + geom_hline(yintercept = 0)


# Counting loop types
allLoopsAnnotatedFiltered <- allLoopsAnnotated %>% dplyr::filter(!is.na(loopID2)) %>%
  dplyr::filter(!(loopID == loopID2))
temp <- allLoopsAnnotatedFiltered %>% dplyr::select(loopID2, Anno2)
result <- temp %>%
  group_by(loopID2, Anno2) %>%
  summarise(count = n(), .groups = 'drop') %>%
  group_by(loopID2) %>%
  mutate(frequency = count / sum(count)) %>%
  arrange(loopID2, Anno2)

result <- largest_loops %>% dplyr::left_join(result, by = c("loopID" = "loopID2"))

gene.group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
gene.group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene

data <- tibble(
  gene = c(gene.group1, gene.group2),
  group = c(rep("group1", length(gene.group1)),
            rep("group2", length(gene.group2)))
)

data <- data %>% dplyr::left_join(result, by = c("gene"))
data <- data %>% filter(complete.cases(.))

all_anno2 <- unique(data$Anno2)

# Calculate average frequency of Anno2 for each gene in each group
result <- data %>%
  # Ensure all possible Anno2 values are present for each gene and group
  complete(gene, Anno2 = all_anno2, fill = list(frequency = 0)) %>% 
  dplyr::mutate(group = case_when(
    gene %in% gene.group1 ~ "group1",
    gene %in% gene.group2 ~ "group2",
    TRUE ~ NA
  )) %>%
  dplyr::select(group, Anno2, frequency)
  

ggplot(result, aes(x = Anno2, fill = group, y = frequency)) + geom_boxplot()

```


```{r}
# View results
ggplot(largest_loops, aes(x = loop_width)) + geom_histogram()


### Checking how many genes from each group has encompassing S-S
gene.group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
gene.group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene

data <- tibble(
  gene = c(gene.group1, gene.group2),
  group = c(rep("group1", length(gene.group1)),
            rep("group2", length(gene.group2)))
)

data <- data %>% dplyr::left_join(largest_loops, by = c("gene"))
data <- data %>% dplyr::mutate(
  hasSS = ifelse(is.na(loop_width), "NO", "YES")
)

data_summary <- data %>%
  group_by(group) %>%
  summarize(
    total = n(),
    hasSS_yes = sum(hasSS == "YES"),
    percentage_yes = (hasSS_yes / total) * 100
  )

ggplot(data_summary, aes(x = group, y = percentage_yes, fill = group)) +
  geom_bar(stat = "identity", color = "black") +
  labs(
    title = "Percentage of Each Group with hasSS = YES",
    x = "Group",
    y = "Percentage (%)"
  ) + ylim(0, 100) +
  theme_minimal()


temp <- data %>% dplyr::filter(hasSS == "YES")
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$loop_width
  distance2 <- (data %>% dplyr::filter(group ==group2) )$loop_width
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}
pv <- convPvalue(getPvalWilcox(temp, "group1", "group2"))

ggplot(temp, aes(x = group, y = loop_width)) + geom_violin() + geom_boxplot(width = 0.5) +
  annotate("text", x = 1, y = 0, label = pv, size = 5, color = "black")

### Comparing RNA perturbation

diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(c(1, 4, 5)) %>%
  dplyr::mutate(hasSS = ifelse(ensembl_gene_id %in% largest_loops$gene, "hasSS", "noSS"))


ggplot(diff.RNA, aes(x = abs(log2FoldChange), color = hasSS)) +
  stat_ecdf(size = 0.4) +
  labs(
    x = "Absolute log2(fold change)",
    y = "Cumulative Probability"
  ) + coord_cartesian(xlim = c(0, 1.5)) +
  theme_classic() + # Clean theme
  theme(
      axis.title = element_text(
        size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
    )


### Checking ChIP peak density
largest_loopsGr <- makeGRangesFromDataFrame(largest_loops, 
                                            seqnames.field = "loop_chr", 
                                            start.field = "loop_start", 
                                            end.field = "loop_end", 
                                            keep.extra.columns = TRUE)

# Find overlaps between ChIP-seq peaks and the largest loops
chip_overlaps <- findOverlaps(largest_loopsGr, peak.CTCF)

# Count the number of ChIP-seq peaks per loop
loop_peak_counts <- table(queryHits(chip_overlaps))

# Create a data frame with the counts and loop details
largest_loops$density <- 0  # Initialize density column
largest_loops$count <- 0    # Initialize peak count column

# Add peak counts to the corresponding loops
largest_loops[as.numeric(names(loop_peak_counts)), "count"] <- as.integer(loop_peak_counts)

# Calculate the density (peaks per kilobase)
largest_loops$density <- largest_loops$count / ((largest_loops$loop_width-1) / 1000)


data <- tibble(
  gene = c(gene.group1, gene.group2),
  group = c(rep("group1", length(gene.group1)),
            rep("group2", length(gene.group2)))
)

data <- data %>% dplyr::left_join(largest_loops, by = c("gene"))
data <- data %>% dplyr::mutate(
  hasSS = ifelse(is.na(loop_width), "NO", "YES")
) %>% dplyr::filter(hasSS == "YES")

getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$density
  distance2 <- (data %>% dplyr::filter(group ==group2) )$density
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

pv <- convPvalue(getPvalWilcox(data, "group1", "group2"))

ggplot(data, aes(x = group, y = density)) + geom_violin(aes(fill = group), show.legend = FALSE) + 
  geom_boxplot(width = 0.1, outlier.shape = NA) +
  stat_summary(aes(group = group), fun = mean, 
               geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  theme_classic() +
  annotate("text", x = 1, y = 0, label = pv, size = 5, color = "black") +
  ggtitle("CTCF")


```

### [3.41] Finding largest convergent encompassing S-S
#### Filtering convergent S-S loops
```{r}
name <- "chromo_cons_annoHierarchy"

# IMPORT S-S LOOPS
allLoops <- fread(here(consensusDir, "chromo_cons_annoHierarchy.tsv"))
strLoops <- allLoops %>% dplyr::filter(Anno2 %in% c("S-S"))
temp <- strLoops %>% dplyr::select(c(1, 2, 3, 5, 6))  # Assuming columns include anchor positions
colnames(temp) <- c("chrom", "start1", "end1", "start2", "end2")
strLoopsGr1 <- makeGRangesFromDataFrame(
  temp %>% dplyr::select(chrom, start = start1, end = end1)
)
strLoopsGr2 <- makeGRangesFromDataFrame(
  temp %>% dplyr::select(chrom, start = start2, end = end2)
)

# IMPORT CTCF MOTIF ANNOTATION
ctcfMotifs <- fread(here(refDir, "33248_CTCF_07-729_Bruce-4_peaks.mergePeak.motifAnnotated.bed"))
colnames(ctcfMotifs) <- c("chrom", "start", "end", "motif")
ctcfGr <- makeGRangesFromDataFrame(ctcfMotifs, keep.extra.columns = TRUE)

# Annotate each loop anchor with CTCF motifs
anchor1_with_ctcf <- findOverlaps(strLoopsGr1, ctcfGr)
anchor2_with_ctcf <- findOverlaps(strLoopsGr2, ctcfGr)

# Get motif information for anchor1 overlaps
anchor1_ctcf_motif <- rep(NA, length(strLoopsGr1))  # Initialize motif vector
anchor1_ctcf_motif[queryHits(anchor1_with_ctcf)] <- ctcfGr[subjectHits(anchor1_with_ctcf)]$motif

# Get motif information for anchor2 overlaps
anchor2_ctcf_motif <- rep(NA, length(strLoopsGr2))  # Initialize motif vector
anchor2_ctcf_motif[queryHits(anchor2_with_ctcf)] <- ctcfGr[subjectHits(anchor2_with_ctcf)]$motif

# Add motif information to strLoops
strLoops <- strLoops %>%
  mutate(
    anchor1_motif = anchor1_ctcf_motif,
    anchor2_motif = anchor2_ctcf_motif
  )

# Filter for convergent CTCF interactions (forward in anchor1, reverse in anchor2)
convergent_loops <- strLoops %>%
  filter(anchor1_motif == "fwd" & anchor2_motif == "rev") %>% dplyr::filter(Anno2 %in% c("S-S"))
```

#### Processing
```{r}
# Extract loop boundary from left to right
temp <- convergent_loops %>% dplyr::select(c(1, 2, 6))
colnames(temp) <- c("chrom", "start", "end")
strLoopsGr <- makeGRangesFromDataFrame(temp)

# IMPORT TSS
flankSize <- 10
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3),
                TSSstart = TSS - flankSize,
                TSSend = TSS + flankSize) %>%
  dplyr::select(V1, TSSstart, TSSend, V6, V5)
colnames(gene.tb) <- c("chrom", "start", "end", "ensembl", "gene")
geneGr <- makeGRangesFromDataFrame(gene.tb, keep.extra.columns = TRUE)


# Find overlaps between loops and TSS
overlaps <- findOverlaps(geneGr, strLoopsGr)

# Annotate overlaps
tss_with_loops <- geneGr[queryHits(overlaps)]
loops_with_tss <- strLoopsGr[subjectHits(overlaps)]

# Combine into a data frame for processing
loop_data <- data.frame(gene = tss_with_loops$ensembl,
                        loop_chr = seqnames(loops_with_tss),
                        loop_start = start(loops_with_tss),
                        loop_end = end(loops_with_tss),
                        loop_width = width(loops_with_tss))

# Identify the largest loop for each gene
largest_loops <- loop_data[order(loop_data$gene, -loop_data$loop_width), ]
largest_loops <- largest_loops[!duplicated(largest_loops$gene), ]
largest_loops <- largest_loops %>% dplyr::mutate(loopID = paste(loop_chr, loop_start, loop_end, sep = "_"))
largest_loops <- largest_loops[order(largest_loops$loop_chr, largest_loops$loop_start),]
rownames(largest_loops) <- NULL

# Checking smaller loops
allLoopsGr <- makeGRangesFromDataFrame(allLoops, 
                                       seqnames.field = "chrom1", 
                                       start.field = "start1", 
                                       end.field = "end2")
largestLoopsGr <- makeGRangesFromDataFrame(largest_loops, 
                                           seqnames.field = "loop_chr", 
                                           start.field = "loop_start", 
                                           end.field = "loop_end")
largestLoopsGr <- sort(unique(largestLoopsGr))
complete_overlaps <- subsetByOverlaps(allLoopsGr, largestLoopsGr, type = "within")
overlaps <- findOverlaps(complete_overlaps, largestLoopsGr)


completeOverlapDf <- as.data.frame(complete_overlaps) %>% dplyr::mutate(loopIDAll = paste(seqnames, start, end, sep = "_"))
largestLoopsDf <- as.data.frame(largestLoopsGr) %>% dplyr::mutate(loopID = paste(seqnames, start, end, sep = "_"))

loopIDpairs <- data.frame(loopID = completeOverlapDf$loopIDAll[queryHits(overlaps)],
                          loopID2 = largestLoopsDf$loopID[subjectHits(overlaps)])

# allLoopsAnnotated <- allLoops %>% dplyr::mutate(loopID = paste(chrom1, start1, end2, sep = "_")) %>% dplyr::left_join(loopIDpairs, by = c("loopID"))


##################################
diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = diffCutoff,
                                      annoList = c("P-P", "P-E", "P-S", "P-X")) %>% unnest(gene) %>%
  dplyr::mutate(loopID = paste(chrom1, start1, end2, sep = "_"))
temp <- geneAnnoData %>% dplyr::left_join(loopIDpairs, by = c("loopID"))
temp2 <- temp %>% dplyr::select(gene, loopID2) %>%
  group_by(gene) %>%              # Group data by the 'gene' column
  summarize(geneHasSS = !all(is.na(loopID2)))  # Check if all loopID2 values for each gene are NA
temp <- temp %>% left_join(temp2, by = c("gene"))
data.all <- temp %>% dplyr::mutate(group = ifelse(!geneHasSS, "No",
                                              ifelse(is.na(loopID2), "Outside", "Within")),
                               size = start2 - start1)

##################################
outfile <- data.all %>% dplyr::select(loopID2, gene) %>% dplyr::filter(!is.na(loopID2)) %>%
  separate(loopID2, into = c("chrom", "start", "end"), sep = "_", convert = TRUE)
# fwrite(outfile, here(consensusDir, "convergent_ss_domain.bed"), sep = "\t", col.names = FALSE)
```

#### Plotting
```{r}
data <- data.all %>% dplyr::filter(Anno2 %in% c("P-E"))

temp <- data %>% dplyr::select(chrom1, start1, end1, chrom2, start2, end2, group) %>% distinct()

temp1 <- temp %>% dplyr::filter(group == "No") %>% dplyr::select(-group)
fwrite(temp1, here(consensusDir, "insulated_domain_ss_no.bedpe"), sep = "\t", col.names = FALSE)
temp2 <- temp %>% dplyr::filter(group == "Within") %>% dplyr::select(-group)
fwrite(temp2, here(consensusDir, "insulated_domain_ss_within.bedpe"), sep = "\t", col.names = FALSE)
temp3 <- temp %>% dplyr::filter(group == "Outside") %>% dplyr::select(-group)
fwrite(temp3, here(consensusDir, "insulated_domain_ss_outside.bedpe"), sep = "\t", col.names = FALSE)

temp <- data %>% dplyr::select(id, group, size) %>% distinct()


getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$size
  distance2 <- (data %>% dplyr::filter(group ==group2) )$size
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

convPvalue(getPvalWilcox(temp, "No", "Within"))
convPvalue(getPvalWilcox(temp, "No", "Outside"))
convPvalue(getPvalWilcox(temp, "Outside", "Within"))


p <- ggplot(temp, aes(x = group, y = size, fill = group)) + 
  geom_violin(linewidth = lineMedium* mmToLineUnit, lineend = "square", alpha = .4, , show.legend = FALSE) + 
  geom_boxplot(width = 0.3, color = "black",
                 linewidth = lineMedium * mmToLineUnit, lineend = "square",
                 outlier.shape = NA,  alpha = 0.6, show.legend = FALSE) + 
  theme_classic() + scale_y_continuous(labels = label_kb_mb) +
  scale_fill_manual(values = c("#777777", "#F28E2C", "#F28E2C")) +
  stat_summary(
      aes(group = group), fun = mean,
      geom = "point", shape = 21, size = 0.5,
      fill = "black", color = "black", position = position_dodge(.3)
  ) +
  theme(
    axis.title.x = element_blank(),
    axis.title.y = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),    
    axis.text.x = element_text(
      size = fontSizeM,
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  ) +labs(y = "Loop size")

fileName <- paste0("insulationBoundary_size_convSS_PE")
width <-33*mmToInch
height <- 35*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$diff_dTAG_DMSO
  distance2 <- (data %>% dplyr::filter(group ==group2) )$diff_dTAG_DMSO
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

temp <- data %>% dplyr::select(group, id, diff_dTAG_DMSO) %>% distinct()

convPvalue(getPvalWilcox(temp, "No", "Within"))
convPvalue(getPvalWilcox(temp, "No", "Outside"))
convPvalue(getPvalWilcox(temp, "Outside", "Within"))

p <- ggplot(data, aes(x = group, y = diff_dTAG_DMSO, fill = group)) + 
  geom_violin(linewidth = lineMedium * mmToLineUnit, lineend = "square", alpha = .4, , show.legend = FALSE) + 
  geom_boxplot(width = 0.3, color = "black",
               linewidth = lineMedium * mmToLineUnit, lineend = "square",
               outlier.shape = NA,  alpha = 0.6, show.legend = FALSE) + 
  theme_classic() +   
    scale_fill_manual(values = c("#777777", "#F28E2C", "#F28E2C")) +
  stat_summary(
    aes(group = group), fun = mean,
    geom = "point", shape = 21, size = 0.5,
    fill = "black", color = "black", position = position_dodge(.3)
  ) +
  theme(
    axis.title.x = element_blank(),
    axis.title.y = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),    
    axis.text.x = element_text(
      size = fontSizeM,
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  ) +labs(y = "Δ loop score") +    
  geom_hline(yintercept = 0,
             alpha = 1,
             color = "black",
             size = lineThick*mmToLineUnit,
             lineend = "square") + 
  geom_hline(yintercept = -0.2,
             alpha = 0.5, 
             color = "black",
             size = lineThick*mmToLineUnit,
             lineend = "square", linetype = "dashed") +
  coord_cartesian(ylim = c(-0.8, 0.5))
    
fileName <- paste0("insulationBoundary_delta_convSS_pe")
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()  


###################### 

minValue <- -4
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
  dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
                log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)),
                log_obsexp_diff_dTAG_DMSO = log_obsexp_dTAG - log_obsexp_DMSO,
                log_obsexp_diff_A485_DMSO = log_obsexp_A485 - log_obsexp_DMSO)

data2 <- data %>% left_join(obsexp, by = c("id"))

temp <- data2 %>% dplyr::select(group, id, log_obsexp_diff_dTAG_DMSO) %>% distinct()


getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$log_obsexp_diff_dTAG_DMSO
  distance2 <- (data %>% dplyr::filter(group ==group2) )$log_obsexp_diff_dTAG_DMSO
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

convPvalue(getPvalWilcox(temp, "No", "Within"))
convPvalue(getPvalWilcox(temp, "No", "Outside"))
convPvalue(getPvalWilcox(temp, "Outside", "Within"))


p <- ggplot(temp, aes(x = group, y = log_obsexp_diff_dTAG_DMSO, fill = group)) + 
  geom_violin(linewidth = lineMedium* mmToLineUnit, lineend = "square", alpha = .4, , show.legend = FALSE) + 
  geom_boxplot(width = 0.3, color = "black",
               linewidth = lineMedium * mmToLineUnit, lineend = "square",
               outlier.shape = NA,  alpha = 0.6, show.legend = FALSE) + 
  theme_classic() + 
  scale_fill_manual(values = c("#777777", "#F28E2C", "#F28E2C")) +
  
  stat_summary(
    aes(group = group), fun = mean,
    geom = "point", shape = 21, size = 0.5,
    fill = "black", color = "black", position = position_dodge(.3)
  ) +
  theme(
    axis.title.x = element_blank(),
    axis.title.y = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),    
    axis.text.x = element_text(
      size = fontSizeM,
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  ) +labs(y = "log2(fc of obs/exp)") +    
  geom_hline(yintercept = 0,
             alpha = 1,
             color = "black",
             size = lineThick*mmToLineUnit,
             lineend = "square") + 
  coord_cartesian(ylim = c(-2, 2))
    width <-30*mmToInch
height <- 35*mmToInch
fileName <- paste0("insulationBoundary_obsexp_convSS_pe")
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()  





#####################

data <- data.all
gene.withinBoundary <- unique((data %>% dplyr::filter(group == "Within"))$gene)
gene.outsideBoundary <- unique((data %>% dplyr::filter(group == "Outside"))$gene)
gene.noBoundary <- unique((data %>% dplyr::filter(group == "No"))$gene)

gene.wBoundary <- unique(c(gene.withinBoundary, gene.outsideBoundary))

gene.group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
gene.group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene

print("perc of gene with loop within Boundary")
sum(gene.group1 %in% gene.withinBoundary)/length(gene.group1)*100
sum(gene.group2 %in% gene.withinBoundary)/length(gene.group2)*100

print("perc of gene with loop outside Boundary")
sum(gene.group1 %in% gene.outsideBoundary)/length(gene.group1)*100
sum(gene.group2 %in% gene.outsideBoundary)/length(gene.group2)*100

print("perc of gene with loop with no Boundary")
sum(gene.group1 %in% gene.noBoundary)/length(gene.group1)*100
sum(gene.group2 %in% gene.noBoundary)/length(gene.group2)*100


diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name) %>%
  dplyr::mutate(group = case_when(
    ensembl_gene_id %in% gene.wBoundary ~ "withinBoundary",
    ensembl_gene_id %in% gene.noBoundary ~ "noBoundary",
    TRUE ~ NA
  )) %>%
  dplyr::filter(!is.na(group)) %>%
  dplyr::mutate(absLog2FC = abs(log2FoldChange))
ks_result <- ks.test(
  diff.RNA %>% dplyr::filter(group == "withinBoundary") %>% pull(absLog2FC),
  diff.RNA %>% dplyr::filter(group == "noBoundary") %>% pull(absLog2FC)
)

ggplot(diff.RNA, aes(x = group, y = log2FoldChange)) + geom_violin() + geom_boxplot(width = 0.1)

# getPvalWilcox <- function(data, group1, group2){
#   distance1 <- (data %>% dplyr::filter(group ==group1) )$absLog2FC
#   distance2 <- (data %>% dplyr::filter(group ==group2) )$absLog2FC
#   wil <- wilcox.test(distance1, distance2)
#   return(wil$p.value)
# }
# convPvalue(getPvalWilcox(diff.RNA, "withBoundary", "noBoundary"))
# 

# Create the CDF plot
p <- ggplot(diff.RNA, aes(x = absLog2FC, color = group)) +
  scale_color_manual(values = (c("#777777", "#F28E2C"))) +

stat_ecdf(size = 0.4, linewidth = lineMedium * mmToLineUnit, lineend = "square"  ) + # Use stat_ecdf to plot the empirical CDF
  labs(
    x = "Abs. log2(fold change)",
    y = "Cumulative Probability"
  ) + coord_cartesian(xlim = c(0, 1)) +
  theme_classic() + # Clean theme
  theme(
      axis.title = element_text(
        size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.position = "none",
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
    )

fileName <- paste0("log2FC_cdf_insulationBoundary_convSS_pe")
width <- 33*mmToInch
height <-33*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```


```{r}
temp <- allLoopsAnnotated %>% dplyr::filter(Anno2 %in% c("P-E")) %>% dplyr::mutate(hasSS = !is.na(loopID2))
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(hasSS ==group1) )$diff_dTAG_DMSO
  distance2 <- (data %>% dplyr::filter(hasSS ==group2) )$diff_dTAG_DMSO
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}
pv <- convPvalue(getPvalWilcox(temp, TRUE, FALSE))
ggplot(temp, aes(x = hasSS, y = diff_dTAG_DMSO)) + geom_violin(aes(fill = hasSS), show.legend = FALSE) + 
  geom_boxplot(width = 0.1, outlier.shape = NA) +
  stat_summary(aes(group = hasSS), fun = mean, 
               geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  theme_classic() +
  annotate("text", x = 1, y = 0, label = pv, size = 5, color = "black") +
  ggtitle("PE") + geom_hline(yintercept = 0)




# Counting loop types
allLoopsAnnotatedFiltered <- allLoopsAnnotated %>% dplyr::filter(!is.na(loopID2)) %>%
  dplyr::filter(!(loopID == loopID2))
temp <- allLoopsAnnotatedFiltered %>% dplyr::select(loopID2, Anno2)
result <- temp %>%
  group_by(loopID2, Anno2) %>%
  summarise(count = n(), .groups = 'drop') %>%
  group_by(loopID2) %>%
  mutate(frequency = count / sum(count)) %>%
  arrange(loopID2, Anno2)

result <- largest_loops %>% dplyr::left_join(result, by = c("loopID" = "loopID2"))

gene.group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
gene.group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene

data <- tibble(
  gene = c(gene.group1, gene.group2),
  group = c(rep("group1", length(gene.group1)),
            rep("group2", length(gene.group2)))
)

data <- data %>% dplyr::left_join(result, by = c("gene"))
data <- data %>% filter(complete.cases(.))

all_anno2 <- unique(data$Anno2)

# Calculate average frequency of Anno2 for each gene in each group
result <- data %>%
  # Ensure all possible Anno2 values are present for each gene and group
  complete(gene, Anno2 = all_anno2, fill = list(frequency = 0)) %>% 
  dplyr::mutate(group = case_when(
    gene %in% gene.group1 ~ "group1",
    gene %in% gene.group2 ~ "group2",
    TRUE ~ NA
  )) %>%
  dplyr::select(group, Anno2, frequency)
  

ggplot(result, aes(x = Anno2, fill = group, y = frequency)) + geom_boxplot()



# View results
ggplot(largest_loops, aes(x = loop_width)) + geom_histogram()


### Checking how many genes from each group has encompassing S-S
gene.group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
gene.group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene

data <- tibble(
  gene = c(gene.group1, gene.group2),
  group = c(rep("group1", length(gene.group1)),
            rep("group2", length(gene.group2)))
)

data <- data %>% dplyr::left_join(largest_loops, by = c("gene"))
data <- data %>% dplyr::mutate(
  hasSS = ifelse(is.na(loop_width), "NO", "YES")
)

data_summary <- data %>%
  group_by(group) %>%
  summarize(
    total = n(),
    hasSS_yes = sum(hasSS == "YES"),
    percentage_yes = (hasSS_yes / total) * 100
  )

ggplot(data_summary, aes(x = group, y = percentage_yes, fill = group)) +
  geom_bar(stat = "identity", color = "black") +
  labs(
    title = "Percentage of Each Group with hasSS = YES",
    x = "Group",
    y = "Percentage (%)"
  ) + ylim(0, 100) +
  theme_minimal()

temp <- data %>% dplyr::filter(hasSS == "YES")
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$loop_width
  distance2 <- (data %>% dplyr::filter(group ==group2) )$loop_width
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}
pv <- convPvalue(getPvalWilcox(temp, "group1", "group2"))

ggplot(temp, aes(x = group, y = loop_width)) + geom_violin() + geom_boxplot(width = 0.5) +
  annotate("text", x = 1, y = 0, label = pv, size = 5, color = "black")


### Comparing RNA perturbation

diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(c(1, 4, 5)) %>%
  dplyr::mutate(hasSS = ifelse(ensembl_gene_id %in% largest_loops$gene, "hasSS", "noSS"))


ggplot(diff.RNA, aes(x = abs(log2FoldChange), color = hasSS)) +
  stat_ecdf(size = 0.4) +
  labs(
    x = "Absolute log2(fold change)",
    y = "Cumulative Probability"
  ) + coord_cartesian(xlim = c(0, 1.5)) +
  theme_classic() + # Clean theme
  theme(
      axis.title = element_text(
        size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
    )



### Checking ChIP peak density
largest_loopsGr <- makeGRangesFromDataFrame(largest_loops, 
                                            seqnames.field = "loop_chr", 
                                            start.field = "loop_start", 
                                            end.field = "loop_end", 
                                            keep.extra.columns = TRUE)

# Find overlaps between ChIP-seq peaks and the largest loops
chip_overlaps <- findOverlaps(largest_loopsGr, peak.H3K4me3)

# Count the number of ChIP-seq peaks per loop
loop_peak_counts <- table(queryHits(chip_overlaps))

# Create a data frame with the counts and loop details
largest_loops$density <- 0  # Initialize density column
largest_loops$count <- 0    # Initialize peak count column

# Add peak counts to the corresponding loops
largest_loops[as.numeric(names(loop_peak_counts)), "count"] <- as.integer(loop_peak_counts)

# Calculate the density (peaks per kilobase)
largest_loops$density <- largest_loops$count / ((largest_loops$loop_width-1) / 1000)


data <- tibble(
  gene = c(gene.group1, gene.group2),
  group = c(rep("group1", length(gene.group1)),
            rep("group2", length(gene.group2)))
)

data <- data %>% dplyr::left_join(largest_loops, by = c("gene"))
data <- data %>% dplyr::mutate(
  hasSS = ifelse(is.na(loop_width), "NO", "YES")
) %>% dplyr::filter(hasSS == "YES")

getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(group ==group1) )$density
  distance2 <- (data %>% dplyr::filter(group ==group2) )$density
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

pv <- convPvalue(getPvalWilcox(data, "group1", "group2"))

ggplot(data, aes(x = group, y = density)) + geom_violin(aes(fill = group), show.legend = FALSE) + 
  geom_boxplot(width = 0.1, outlier.shape = NA) +
  stat_summary(aes(group = group), fun = mean, 
               geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
  theme_classic() +
  annotate("text", x = 1, y = 0, label = pv, size = 5, color = "black") +
  ggtitle("H3K4me3")


```

### [3.42] Comparing to Reg loops to A485
```{r}
name <- "chromo_cons_annoHierarchy"
# UP loop
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_UP_diff0.2.bedpe")) %>%
  dplyr::mutate(loopID = paste(V1, V2, V6, sep = "_"))
# NO loop
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_NO_diff0.2.bedpe")) %>%
  dplyr::mutate(loopID = paste(V1, V2, V6, sep = "_"))
# UP NO
loop.upno <- bind_rows(loop.up, loop.no)
# DOWN loop
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_DOWN_diff0.2.bedpe")) %>%
  dplyr::mutate(loopID = paste(V1, V2, V6, sep = "_"))


allLoops <- fread(here(consensusDir, "chromo_cons_annoHierarchy.tsv")) %>%
  dplyr::mutate(loopID = paste(chrom1, start1, end2, sep = "_")) %>%
  dplyr::mutate(group = case_when(
    loopID %in% loop.upno$loopID ~ "UP&NO",
    loopID %in% loop.down$loopID ~ "DOWN",
    TRUE ~ NA
  )) %>%
  dplyr::filter(!is.na(group))

minValue <- -4
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
  dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
                log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
                log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)),
                log_obsexp_diff_dTAG_DMSO = log_obsexp_dTAG - log_obsexp_DMSO,
                log_obsexp_diff_A485_DMSO = log_obsexp_A485 - log_obsexp_DMSO)

allLoops <- allLoops %>% left_join(obsexp, by = c("id"))
allLoops$group <- factor(allLoops$group, levels = c("UP&NO", "DOWN"))


### Plotting
p1 <- ggplot(allLoops, aes(x = group, y = diff_A485_DMSO, fill = group)) + 
  geom_violin(linewidth = lineThick * mmToLineUnit, lineend = "square", alpha = .4, , show.legend = FALSE) + 
  geom_boxplot(width = 0.3, color = "black",
               linewidth = lineThick * mmToLineUnit, lineend = "square",
               outlier.shape = NA,  alpha = 0.6, show.legend = FALSE) + 
  theme_classic() + 
  stat_summary(
    aes(group = group), fun = mean,
    geom = "point", shape = 21, size = 0.5,
    fill = "black", color = "black", position = position_dodge(.3)
  ) +
  scale_fill_manual(values = c("grey50", "grey50")) +

  theme(
    axis.title.x = element_blank(),
    axis.title.y = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),    
    axis.text.x = element_text(
      size = fontSizeM,
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  ) +labs(y = "Δ loop score")  +    
  geom_hline(yintercept = 0,
             alpha = 1,
             color = "black",
             size = lineThick*mmToLineUnit,
             lineend = "square") + 
  geom_hline(yintercept = -0.2,
             alpha = 0.5, 
             color = "black",
             size = lineThick*mmToLineUnit,
             lineend = "square", linetype = "dashed") +
  coord_cartesian(ylim = c(-0.5, 0.5))
    

####
p2 <- ggplot(allLoops, aes(x = group, y = log_obsexp_diff_A485_DMSO, fill = group)) + 
  geom_violin(linewidth = lineThick * mmToLineUnit, lineend = "square", alpha = .4, , show.legend = FALSE) + 
  geom_boxplot(width = 0.3, color = "black",
               linewidth = lineThick * mmToLineUnit, lineend = "square",
               outlier.shape = NA,  alpha = 0.6, show.legend = FALSE) + 
  theme_classic() + 
  stat_summary(
    aes(group = group), fun = mean,
    geom = "point", shape = 21, size = 0.5,
    fill = "black", color = "black", position = position_dodge(.3)
  ) +
  scale_fill_manual(values = c("grey50", "grey50")) +

  theme(
    axis.title.x = element_blank(),
    axis.title.y = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),    
    axis.text.x = element_text(
      size = fontSizeM,
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  ) +labs(y = "log2(fc of obs/exp)")  +    
  geom_hline(yintercept = 0,
             alpha = 1,
             color = "black",
             size = lineThick*mmToLineUnit,
             lineend = "square") + 
  coord_cartesian(ylim = c(-2, 2))
  


#############

### Plotting
p3 <- ggplot(allLoops, aes(x = group, y = diff_dTAG_DMSO, fill = group)) + 
  geom_violin(linewidth = lineThick * mmToLineUnit, lineend = "square", alpha = .4, , show.legend = FALSE) + 
  geom_boxplot(width = 0.3, color = "black",
               linewidth = lineThick * mmToLineUnit, lineend = "square",
               outlier.shape = NA,  alpha = 0.6, show.legend = FALSE) + 
  theme_classic() + 
  stat_summary(
    aes(group = group), fun = mean,
    geom = "point", shape = 21, size = 0.5,
    fill = "black", color = "black", position = position_dodge(.3)
  ) +
  scale_fill_manual(values = c("grey50", "grey50")) +

  theme(
    axis.title.x = element_blank(),
    axis.title.y = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),    
    axis.text.x = element_text(
      size = fontSizeM,
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  ) +labs(y = "Δ loop score")  +    
  geom_hline(yintercept = 0,
             alpha = 1,
             color = "black",
             size = lineThick*mmToLineUnit,
             lineend = "square") + 
  geom_hline(yintercept = -0.2,
             alpha = 0.5, 
             color = "black",
             size = lineThick*mmToLineUnit,
             lineend = "square", linetype = "dashed") +
  coord_cartesian(ylim = c(-0.5, 0.5))
    

####
p4 <- ggplot(allLoops, aes(x = group, y = log_obsexp_diff_dTAG_DMSO, fill = group)) + 
  geom_violin(linewidth = lineThick * mmToLineUnit, lineend = "square", alpha = .4, , show.legend = FALSE) + 
  geom_boxplot(width = 0.3, color = "black",
               linewidth = lineThick * mmToLineUnit, lineend = "square",
               outlier.shape = NA,  alpha = 0.6, show.legend = FALSE) + 
  theme_classic() + 
  stat_summary(
    aes(group = group), fun = mean,
    geom = "point", shape = 21, size = 0.5,
    fill = "black", color = "black", position = position_dodge(.3)
  ) +
  scale_fill_manual(values = c("grey50", "grey50")) +

  theme(
    axis.title.x = element_blank(),
    axis.title.y = element_text(
      size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),    
    axis.text.x = element_text(
      size = fontSizeM,
      angle = 45,      # Rotate x-axis labels 45 degrees
      hjust = 1,       # Adjust horizontal justification
      vjust = 1        # Adjust vertical justification
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  ) +labs(y = "log2(fc of obs/exp)")  +    
  geom_hline(yintercept = 0,
             alpha = 1,
             color = "black",
             size = lineThick*mmToLineUnit,
             lineend = "square") + 
  coord_cartesian(ylim = c(-2, 2))
  
width <- panelSize(2)*mmToInch
height <- panelSize(1.5)*mmToInch
fileName <- paste0("regLoopScorefromdTAG_dTAG")
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(p1, p2))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(plot_grid(p1, p2))
dev.off()  

width <- panelSize(2)*mmToInch
height <- panelSize(1.5)*mmToInch
fileName <- paste0("regLoopScorefromdTAG_A485")
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(p3, p4))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(plot_grid(p3, p4))
dev.off()  

width <- panelSize(2)*mmToInch
height <- panelSize(1.5)*mmToInch
fileName <- paste0("regLoopScorefromdTAG_loopScore")
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(p1, p3))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(plot_grid(p1, p3))
dev.off()  

width <- panelSize(2)*mmToInch
height <- panelSize(1.5)*mmToInch
fileName <- paste0("regLoopScorefromdTAG_log2fc")
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(p2, p4))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(plot_grid(p2, p4))
dev.off()  

```

### [3.43] Checking RAD21 peak density at group1 and group2 promoter
```{r}

peak.RAD21 <- importPeak(here(refDir, "33250_RAD21_ab992_Bruce-4_peaks.mergePeak.bed"))

##########################################################
flankSize <- 5000
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
  dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3),
                TSSstart = TSS - flankSize,
                TSSend = TSS + flankSize) %>%
  dplyr::select(V6, V5, V1, TSSstart, TSSend)
colnames(gene.tb) <- c("ensembl_gene_id", "external_gene_name", "chr", "start", "end")


# Convert gene.tb to a GRanges object
gene_gr <- GRanges(
  seqnames = gene.tb$chr,
  ranges = IRanges(start = gene.tb$start, end = gene.tb$end),
  gene_id = gene.tb$ensembl_gene_id
)

# Count overlaps between peaks and genes
overlap_counts <- countOverlaps(gene_gr, peak.RAD21)

# Add overlap counts to the original gene.tb data
gene.tb$peak_count <- overlap_counts

###############################################################################

peakNum0 <- (gene.tb %>% dplyr::filter(peak_count == 0))$ensembl_gene_id
peakNum1 <- (gene.tb %>% dplyr::filter(peak_count == 1))$ensembl_gene_id
peakNum2 <- (gene.tb %>% dplyr::filter(peak_count == 2))$ensembl_gene_id
peakNum3 <- (gene.tb %>% dplyr::filter(peak_count == 3))$ensembl_gene_id
peakNumOver4 <- (gene.tb %>% dplyr::filter(peak_count >= 4))$ensembl_gene_id


###############################################################################
name <- "chromo_cons_annoHierarchy"

alpha <- 0.05
fcCutoff <- 0.5
diff.RNA.G1.dTAG <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG.noFCcutoff <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::filter(padj < alpha)
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = 0.2,
                                      annoList = c("P-P", "P-E", "P-S", "P-X"))%>% 
  dplyr::mutate(distance = start2 - start1,
                peakID = paste(chrom1, start1, start2, sep = "_"))


## Dividing genes into groups
temp <- geneAnnoData %>% dplyr::select(diff_dTAG_DMSO, distance, gene) %>% 
  unnest(gene) %>% group_by(gene) %>%
  summarize(mean_diff_score = mean(diff_dTAG_DMSO),
            mean_distance = mean(distance),
            .groups = 'drop')

diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
  dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)

maxLog2FC = 2

temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>% 
  dplyr::mutate(flag = ifelse(gene %in% diff.RNA.G1.dTAG$ensembl_gene_id, "2DOWN", "0NO"),
                maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
                log2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>% 
  dplyr::arrange(flag) %>%
  drop_na(shrinked_log2FC)

temp <- temp %>% dplyr::mutate(
  pnOver = ifelse(gene %in% peakNum0, "peakNum0",
                  ifelse(gene %in% peakNum1, "peakNum1",
                         ifelse(gene %in% peakNum2, "peakNum2",
                                ifelse(gene %in% peakNum3, "peakNum3",
                                       ifelse(gene %in% peakNumOver4, "peakNumOver4", NA)))))) %>%
  drop_na(pnOver)

###############################################################################
# Checking percentage of genes with RAD21 peaks
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene

aaa <- temp %>% dplyr::mutate(group = case_when(
  gene %in% group1 ~ "group1",
  gene %in% group2 ~ "group2",
  TRUE ~ NA
)) %>%
  dplyr::select(group, pnOver, gene) %>%
  dplyr::filter(group %in% c("group1", "group2")) %>%
  dplyr::mutate(hasPeak = ifelse(pnOver == "peakNum0", FALSE, TRUE))

bbb <- aaa %>% dplyr::filter(group == "group1") 

sum(bbb$hasPeak)
bbb <- aaa %>% dplyr::filter(group == "group2") 
sum(bbb$hasPeak)

aaa_summary <- aaa %>%
  group_by(group, pnOver) %>%
  summarize(count = n(), .groups = "drop")

# Create the stacked barplot
aaa_summary <- aaa %>%
  group_by(group, pnOver) %>%
  summarize(count = n(), .groups = "drop") %>%
  group_by(group) %>%
  mutate(ratio = count / sum(count))

# Define the gradient colors
gradient_colors <- c("#D4D4D4", "#DCB0AF", "#E48D8A", "#EC6965", "#F44641")
# gradient_colors <- c("#D4D4D4", "#A2BBCA", "#71A2C0", "#3F89B7", "#0E71AD")

# Create the stacked barplot with gradient colors
p <- ggplot(aaa_summary, aes(x = group, y = ratio, fill = pnOver)) +
  geom_bar(stat = "identity", position = "stack",
           linewidth = lineMedium * mmToLineUnit, lineend = "square") +
  theme_classic() + labs(x = NULL , y = "Ratio") +
  theme(
    axis.title = element_text(
      size = fontSizeM, family = fontType, color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.line = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  ) +
  scale_fill_manual(values = gradient_colors, 
                    guide = guide_legend(
      override.aes = list(size = 0.5), # Adjust legend key symbol size
      keywidth = 2 / 2.54, # Convert 2mm to cm
      keyheight = 2 / 2.54 # Convert 2mm to cm
    )) 

width <- panelSize(1.7)*mmToInch
height <- panelSize(1.3)*mmToInch
fileName <- paste0("rad21perc_promoter")
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()  

```

##### loop score
```{r}
getPvalWilcox <- function(data, group1, group2){
  distance1 <- (data %>% dplyr::filter(pnOver ==group1) )$mean_diff_score
  distance2 <- (data %>% dplyr::filter(pnOver ==group2) )$mean_diff_score
  wil <- wilcox.test(distance1, distance2)
  return(wil$p.value)
}

ps01 <- round(getPvalWilcox(temp, "peakNum0", "peakNum1"), 5)
ps12 <- round(getPvalWilcox(temp, "peakNum1", "peakNum2"), 5)
ps23 <- round(getPvalWilcox(temp, "peakNum2", "peakNum3"), 5)
ps34 <- round(getPvalWilcox(temp, "peakNum3", "peakNumOver4"), 5)
ps24 <- round(getPvalWilcox(temp, "peakNum2", "peakNumOver4"), 5)
ps14 <- round(getPvalWilcox(temp, "peakNum1", "peakNumOver4"), 5)
ps04 <- round(getPvalWilcox(temp, "peakNum0", "peakNumOver4"), 5)


p <- ggplot(temp, aes(x = pnOver, y = mean_diff_score)) + 
  geom_violin(aes(fill = pnOver), 
              color = "black",
              linewidth = lineMedium * mmToLineUnit, lineend = "square",
              show.legend = FALSE) +
  geom_boxplot(width = 0.3, color = "black",
               linewidth = lineMedium * mmToLineUnit, lineend = "square",
               outlier.shape = NA
  ) + theme_classic() + labs(x = NULL , y = "Average Δ loop score") +
  stat_summary(
    aes(group = pnOver), fun = mean,
    geom = "point", shape = 21, size = 0.5,
    fill = "black", color = "black"
  ) +
  geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
  theme(
    axis.title = element_text(
      size = fontSizeM, family = fontType, color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.text.x = element_text(
      angle = 45, hjust = 1, vjust = 1
    ),
    axis.line = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  )+
  annotate("text", x = 1, y = 0, label = paste0("ps01: ", convPvalue(ps01), "\n",
                                                "ps12: ", convPvalue(ps12), "\n",
                                                "ps23: ", convPvalue(ps23), "\n",
                                                "ps34: ", convPvalue(ps34), "\n",
                                                "ps24: ", convPvalue(ps24), "\n", 
                                                "ps14: ", convPvalue(ps14), "\n",
                                                "ps04: ", convPvalue(ps04), "\n"), 
           color = "black", hjust = 0, size = 2) +
    scale_fill_manual(values = c("#777777", "#8B7E65", "#A28452", "#C2884D", "#F28E2C")) +
  coord_cartesian(ylim = c(-0.5, 0.1))



fileName <- paste0("diffScore_barplot_RAD21peakWithin10kb_dTAG_vs_DMSO")
width <- 33*mmToInch
height <-38*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()
```

##### log2FC
```{r}
temp <- temp %>% dplyr::mutate(absLog2FC = abs(log2FoldChange))
p <- ggplot(temp, aes(x = absLog2FC, color = pnOver)) +
scale_color_manual(values = (c("#777777", "#8B7E65", "#A28452", "#C2884D", "#F28E2C"))) +
  stat_ecdf(size = 0.4, linewidth = lineMedium * mmToLineUnit, lineend = "square" ) + # Use stat_ecdf to plot the empirical CDF
  labs(
    x = "Abs. log2(fold change)",
    y = "Cumulative Probability"
  ) + coord_cartesian(xlim = c(0, 1.5)) +
  theme_classic() + # Clean theme
  theme(
      axis.title = element_text(
        size = fontSizeM,
      family = fontType,
      color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS,
      family = fontType,
      color = "#000000"
    ),
    axis.line = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000",
      size = lineThick*mmToLineUnit,
      lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.position = "none",
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
    ) + scale_y_continuous(labels = scales::number_format(accuracy = 0.1))
fileName <- paste0("log2FC_cdf_psGroup_dTAG_vs_DMSO")
width <- 33*mmToInch
height <-33*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

```

### [3.44] Checking RAD21 peak density at group1 and group2 promoter
```{r}

peak.RAD21 <- importPeak(here(refDir, "33250_RAD21_ab992_Bruce-4_peaks.mergePeak.bed"))
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene


##########################################################
name <- "chromo_cons_annoHierarchy"

geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
                                      diffCutoff = 0.2,
                                      annoList = c("P-E"))

temp1 <- geneAnnoData %>% dplyr::filter(A1 == "E") %>% dplyr::select(chrom1, start1, end1, gene)
colnames(temp1) <- c("chr", "start", "end", "gene")
temp2 <- geneAnnoData %>% dplyr::filter(A2 == "E") %>% dplyr::select(chrom2, start2, end2, gene)
colnames(temp2) <- c("chr", "start", "end", "gene")

enhAnchors <- bind_rows(temp1, temp2) %>% unnest(gene) %>%
  dplyr::mutate(
    group = case_when(
      gene %in% group1 ~ "Grp1",
      gene %in% group2 ~ "Grp2",
      TRUE ~ NA
    )
  ) %>% dplyr::filter(!is.na(group))

enh <- makeGRangesFromDataFrame(enhAnchors %>% dplyr::select(c(1, 2, 3)))

# Count overlaps between peaks and genes
overlap_counts <- countOverlaps(enh, peak.RAD21)

# Add overlap counts to the original gene.tb data
enhAnchors$peak_count <- overlap_counts

enhAnchors <- enhAnchors %>% dplyr::mutate(
  peakGroup = case_when(
    peak_count == 0 ~ "peakNum0",
    peak_count == 1 ~ "peakNum1",
    peak_count == 2 ~ "peakNum2",
    peak_count == 3 ~ "peakNum3",
    peak_count >= 4 ~ "peakNumOver4"
  )
)

aaa <- enhAnchors %>% 
  dplyr::select(group, peakGroup, gene) %>%
  dplyr::mutate(hasPeak = ifelse(peakGroup == "peakNum0", FALSE, TRUE))

bbb <- aaa %>% dplyr::filter(group == "Grp1") 
sum(bbb$hasPeak)/nrow(bbb)
bbb <- aaa %>% dplyr::filter(group == "Grp2") 
sum(bbb$hasPeak)/nrow(bbb)


aaa_summary <- aaa %>%
  group_by(group, peakGroup) %>%
  summarize(count = n(), .groups = "drop")

# Create the stacked barplot
aaa_summary <- aaa %>%
  group_by(group, peakGroup) %>%
  summarize(count = n(), .groups = "drop") %>%
  group_by(group) %>%
  mutate(ratio = count / sum(count))

# Define the gradient colors
# gradient_colors <- c("#D4D4D4", "#DCB0AF", "#E48D8A", "#EC6965", "#F44641")
gradient_colors <- c("#D4D4D4", "#A2BBCA", "#71A2C0", "#3F89B7", "#0E71AD")

# Create the stacked barplot with gradient colors
p <- ggplot(aaa_summary, aes(x = group, y = ratio, fill = peakGroup)) +
  geom_bar(stat = "identity", position = "stack",
           linewidth = lineMedium * mmToLineUnit, lineend = "square") +
  theme_classic() + labs(x = NULL , y = "Ratio") +
  theme(
    axis.title = element_text(
      size = fontSizeM, family = fontType, color = "#000000"
    ),
    axis.text = element_text(
      size = fontSizeS, family = fontType, color = "#000000"
    ),
    axis.line = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    axis.ticks = element_line(
      color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
    ),
    panel.background = element_rect(fill = "transparent"),
    legend.text = element_text(family = fontType, size = fontSizeS),
    legend.title = element_text(family = fontType, size = fontSizeS)
  ) +
  scale_fill_manual(values = gradient_colors, 
                    guide = guide_legend(
      override.aes = list(size = 0.5), # Adjust legend key symbol size
      keywidth = 2 / 2.54, # Convert 2mm to cm
      keyheight = 2 / 2.54 # Convert 2mm to cm
    )) 

width <- panelSize(1.7)*mmToInch
height <- panelSize(1.3)*mmToInch
fileName <- paste0("rad21perc_enh")
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()  

```

# NOT CONCLUSIVE BELOW HERE
################################################################################

```{r}

data <- fread(here(resultDir, "chromHMM", "A485_pe-pe_anchors", "overlap_enrich_100_state.txt"))
colnames(data) <- c("state", "genome", "pe-pe_A485_down", "pe-pe_A485_no", "pe-pe_A485_up")
data <- data %>% dplyr::select(-genome) %>% as.matrix()

  column_to_rownames(var = "state") %>%
  as.matrix()

library(circlize)

col_fun <- colorRamp2(c(0, 1, 10), 
                      c("blue", "white", "red"))

library(circlize)
col_fun <- colorRamp2(c(min(data), max(data)), c("white", "red"))


#fviz_nbclust(data, kmeans, method = "wss")

p <- Heatmap(
  data,
  name = "Odds Ratio",                   # Name of the heatmap legend
  cluster_columns = FALSE,            # Remove column dendrogram
  row_km = 10,                         # Define the number of k-means clusters for rows (adjust as needed)
  show_row_dend = FALSE,
  col = col_fun,
  border = TRUE
)

fileName <- paste0("anchorLOLA_dTAG_vs_DMSO_diff0.2_allLoops_extreme_regAnchorBackground_atac")
height <- 7
width <- 3.5
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),  height = height, width = width)
print(p)
dev.off()

```



#################################################################################
#################################################################################
#################################################################################
#################################################################################
#################################################################################

